Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Sharma, Shashank

Regards

Shashank


On 10/21/2016 12:50 AM, Imre Deak wrote:

On Thu, 2016-10-20 at 21:20 +0300, Jani Nikula wrote:

On Thu, 20 Oct 2016, Imre Deak  wrote:

On my APL the LSPCON firmware resumes in PCON mode as opposed to the
expected LS mode. It also appears to be in a state where AUX DPCD reads
will succeed but return garbage recovering only after a few hundreds of
milliseconds. After the recovery time DPCD reads will result in the
correct values and things will continue to work. If I2C over AUX is
attempted during this recovery time (implying an AUX write transaction)
the firmware won't recover and will stay in this broken state.

As a workaround check if the firmware is in PCON state after resume and
if so wait until the correct DPCD values are returned. For this we
compare the branch descriptor with the one we cached during init time.
If the firmware was in the LS state, we skip the w/a and continue as
before.

Cc: Shashank Sharma 
Cc: Ville Syrjälä 
Cc: Jani Nikula 
Signed-off-by: Imre Deak 
---
  drivers/gpu/drm/i915/intel_dp.c |  2 +-
  drivers/gpu/drm/i915/intel_drv.h|  6 -
  drivers/gpu/drm/i915/intel_lspcon.c | 52 ++---
  3 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index e90211e..ec031db 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
intel_dp->DP = DP;
  }
  
-static bool

+bool
  intel_dp_read_dpcd(struct intel_dp *intel_dp)
  {
if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a35e241..9a2366e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -972,7 +972,9 @@ struct intel_dp {
  struct intel_lspcon {
bool active;
enum drm_lspcon_mode mode;
-   struct drm_dp_aux *aux;
+   struct intel_dp *intel_dp;
IMHO, Its not required to have the intel_dp inside lspcon. we can always 
get intel_dig_port from lspcon, and intel_dp from intel_dig_port
The reason why I kept aux here was thats the only thing required to 
read/write from/to lspcon.

+   bool desc_valid;
+   struct intel_dp_desc desc;

I guess we could cache the desc in intel_dp directly. Independent of
this patch.

It's not used anywhere else, but I can move it to intel_dp.


Also, I'm wondering if we could stick with just aux here, and read
something else from dpcd instead.

Not sure either, I picked desc since we read it out anyway during init.


  };
  
  struct intel_digital_port {

@@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int 
lane_count)
  }
  
  bool

+intel_dp_read_dpcd(struct intel_dp *intel_dp);
+bool
  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
  void
  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
b/drivers/gpu/drm/i915/intel_lspcon.c
index d2c8cb2..54c6173 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -30,7 +30,7 @@
  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon 
*lspcon)
  {
enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
-   struct i2c_adapter *adapter = >aux->ddc;
+   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
  
  	if (drm_lspcon_get_mode(adapter, ¤t_mode))

DRM_ERROR("Error reading LSPCON mode\n");
@@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
  {
int err;
enum drm_lspcon_mode current_mode;
-   struct i2c_adapter *adapter = >aux->ddc;
+   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
  
  	err = drm_lspcon_get_mode(adapter, ¤t_mode);

if (err) {
@@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
  static bool lspcon_probe(struct intel_lspcon *lspcon)
  {
enum drm_dp_dual_mode_type adaptor_type;
-   struct i2c_adapter *adapter = >aux->ddc;
+   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
  
  	/* Lets probe the adaptor and check its type */

adaptor_type = drm_dp_dual_mode_detect(adapter);
@@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
return true;
  }
  
+static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)

+{
+   unsigned long start = jiffies;
+
+   if (!lspcon->desc_valid)
+   return;
+
+   while (1) {
+   struct intel_dp_desc desc;
+
+   /*
+* The w/a only applies in PCON mode and we don't expect any
+* AUX errors.
+*/
+   if 

[Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915: GVT-g driver depends on 64BIT kernel

2016-10-20 Thread Patchwork
== Series Details ==

Series: drm/i915: GVT-g driver depends on 64BIT kernel
URL   : https://patchwork.freedesktop.org/series/14143/
State : warning

== Summary ==

Series 14143v1 drm/i915: GVT-g driver depends on 64BIT kernel
https://patchwork.freedesktop.org/api/1.0/series/14143/revisions/1/mbox/

Test drv_module_reload_basic:
dmesg-warn -> PASS   (fi-skl-6700hq)
Test gem_exec_suspend:
Subgroup basic-s3:
pass   -> DMESG-WARN (fi-skl-6700hq)
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-a:
pass   -> DMESG-WARN (fi-skl-6700hq)
Subgroup suspend-read-crc-pipe-b:
pass   -> DMESG-WARN (fi-skl-6700hq)
Subgroup suspend-read-crc-pipe-c:
pass   -> DMESG-WARN (fi-skl-6700hq)

fi-bdw-5557u total:246  pass:231  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:246  pass:204  dwarn:0   dfail:0   fail:0   skip:42 
fi-bxt-t5700 total:246  pass:216  dwarn:0   dfail:0   fail:0   skip:30 
fi-byt-j1900 total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-byt-n2820 total:246  pass:211  dwarn:0   dfail:0   fail:0   skip:35 
fi-hsw-4770  total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650   total:246  pass:185  dwarn:0   dfail:0   fail:1   skip:60 
fi-ivb-3520m total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770  total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-kbl-7200u total:246  pass:222  dwarn:0   dfail:0   fail:0   skip:24 
fi-skl-6260u total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:246  pass:219  dwarn:4   dfail:0   fail:0   skip:23 
fi-skl-6700k total:246  pass:221  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hqtotal:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:246  pass:210  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600  total:246  pass:209  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2780/

8d34fff02efad9abfc12cace4c347eaa1c3804f7 drm-intel-nightly: 
2016y-10m-20d-21h-52m-44s UTC integration manifest
6f93dee drm/i915: GVT-g driver depends on 64BIT kernel

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: GVT-g driver depends on 64BIT kernel

2016-10-20 Thread Zhenyu Wang
We currently don't support GVT-g driver on i386 kernel.
Add explicit dependence on 64bit kernel.

Signed-off-by: Zhenyu Wang 
---
 drivers/gpu/drm/i915/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 6aedc96..c72b007 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -86,6 +86,7 @@ config DRM_I915_USERPTR
 config DRM_I915_GVT
 bool "Enable Intel GVT-g graphics virtualization host support"
 depends on DRM_I915
+depends on 64BIT
 default n
 help
  Choose this option if you want to enable Intel GVT-g graphics
-- 
2.9.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/gvt: Implement WaForceWakeRenderDuringMmioTLBInvalidate

2016-10-20 Thread Zhenyu Wang
On 2016.10.20 17:20:04 +0200, Arkadiusz Hiler wrote:
> On Thu, Oct 20, 2016 at 05:29:36PM +0300, Mika Kuoppala wrote:
> > Arkadiusz Hiler  writes:
> > 
> > > When invalidating RCS TLB the device can enter RC6 state interrupting
> > > the process, therefore the need for render forcewake for the whole
> > > procedure.
> > >
> > > This WA is needed for all production SKL SKUs.
> > >
> > > References: HSD#2136899, HSD#1404391274
> > > Cc: Mika Kuoppala 
> > > Cc: Zhenyu Wang 
> > > Signed-off-by: Arkadiusz Hiler 
> > > ---
> > >  drivers/gpu/drm/i915/gvt/render.c | 11 +++
> > >  1 file changed, 11 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gvt/render.c 
> > > b/drivers/gpu/drm/i915/gvt/render.c
> > > index f54ab85..f5000ea 100644
> > > --- a/drivers/gpu/drm/i915/gvt/render.c
> > > +++ b/drivers/gpu/drm/i915/gvt/render.c
> > > @@ -134,11 +134,22 @@ static void handle_tlb_pending_event(struct 
> > > intel_vgpu *vgpu, int ring_id)
> > >  
> > >   reg = _MMIO(regs[ring_id]);
> > >
> > 
> > Ok not so familiar with the gvt side but I assume this is the host
> > side code and thus the vgpu is not active at this stage.
> 
> That's my understanding as well. It's a code that is setting up gvt for
> further use (shadow context to be exact). It's called indirectly from
> intel_gvt_create_vgpu.
> 

yes, it's for host not for vgpu to handle context switch state for vgpu.

> > Then you could avoid some of the implicit fw dancing
> > by:
> > 
> > diff --git a/drivers/gpu/drm/i915/gvt/render.c 
> > b/drivers/gpu/drm/i915/gvt/render.c
> > index feebb65..93ba156 100644
> > --- a/drivers/gpu/drm/i915/gvt/render.c
> > +++ b/drivers/gpu/drm/i915/gvt/render.c
> > @@ -118,6 +118,7 @@ static u32 gen9_render_mocs_L3[32];
> >  static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
> >  {
> > struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
> > +   enum forcewake_domains fw;
> > i915_reg_t reg;
> > u32 regs[] = {
> > [RCS] = 0x4260,
> > @@ -135,11 +136,21 @@ static void handle_tlb_pending_event(struct 
> > intel_vgpu *vgpu, int ring_id)
> >  
> > reg = _MMIO(regs[ring_id]);
> >  
> > -   I915_WRITE(reg, 0x1);
> > +   fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
> > +   FW_REG_READ | FW_REG_WRITE);
> >  
> > -   if (wait_for_atomic((I915_READ(reg) == 0), 50))
> > +   if (ring_id == RCS && IS_SKYLAKE(dev_priv))
> > +   fw |= FORCEWAKE_RENDER;
> > +
> > +   intel_uncore_forcewake_get(dev_priv, fw);
> > +
> > +   I915_WRITE_FW(reg, 0x1);
> > +
> > +   if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
> > gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id);
> >  
> > +   intel_uncore_forcewake_put(dev_priv, fw);
> > +
> > 
> 
> I can go with it, although I do not have strong preference. I think my
> version is a little bit easier to follow, but his is less error prone,
> as you check for the WA SKU only once, during setting the FW.
> 
> Any recommendations?
> 

I like this one to be safer. Would Mika like to send another one or I
just take your commit message?

thanks

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827


signature.asc
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3] Idleness DRRS test

2016-10-20 Thread Nautiyal Ankit
From: Ramalingam C 

Idleness DRRS:
By default the DRRS state will be at DRRS_HIGH_RR. When a Display
content is Idle for more than 1Sec Idleness will be declared and
DRRS_LOW_RR will be invoked, changing the refresh rate to the
lower most refresh rate supported by the panel. As soon as there
is a display content change there will be a DRRS state transition
as DRRS_LOW_RR--> DRRS_HIGH_RR, changing the refresh rate to the
highest refresh rate supported by the panel.

To test this, Idleness DRRS IGT will probe the DRRS state at below
instances and compare with the expected state.

InstanceExpected State
1. Immediately after rendering the still image  DRRS_HIGH_RR
2. After a delay of 1.2Sec  DRRS_LOW_RR
3. After changing the frame buffer  DRRS_HIGH_RR
4. After a delay of 1.2Sec  DRRS_LOW_RR
5. After changing the frame buffer  DRRS_HIGH_RR
6. After a delay of 1.2Sec  DRRS_LOW_RR

The test checks the driver DRRS state from the debugfs entry. To check the
actual refresh-rate, a separate thread counts the number of vblanks
received per sec. The refresh-rate calculated is checked against the
expected refresh-rate with a tolerance value of 2.

This patch is a continuation of the earlier work
https://patchwork.freedesktop.org/patch/45472/ towards igt for idleness

DRRS. The code is tested on Broxton BXT_T platform.

v2: Addressed the comments and suggestions from Vlad, Marius.
The signoff details from the earlier work are also included.

v3: Modified vblank rate calculation by using reply-sequence, provided by
drmWaitVBlank, as suggested by Chris Wilson.

Signed-off-by: Ramalingam C 
Signed-off-by: Vandana Kannan 
Signed-off-by: aknautiy 
---
 tests/Makefile.sources |   1 +
 tests/kms_drrs.c   | 599 +
 2 files changed, 600 insertions(+)
 create mode 100644 tests/kms_drrs.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index a837977..5f31521 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -91,6 +91,7 @@ TESTS_progs_M = \
kms_cursor_crc \
kms_cursor_legacy \
kms_draw_crc \
+   kms_drrs \
kms_fbc_crc \
kms_fbcon_fbt \
kms_flip \
diff --git a/tests/kms_drrs.c b/tests/kms_drrs.c
new file mode 100644
index 000..bd5a135
--- /dev/null
+++ b/tests/kms_drrs.c
@@ -0,0 +1,599 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmtest.h"
+#include "igt_debugfs.h"
+#include "igt_kms.h"
+#include "intel_chipset.h"
+#include "intel_batchbuffer.h"
+#include "ioctl_wrappers.h"
+#include 
+#include 
+#include 
+#include 
+IGT_TEST_DESCRIPTION(
+"Performs write operations and then waits for DRRS to invoke the"
+"Low Refresh Rate and then disturbs the contents of the screen once"
+"again hence DRRS revert back to High Refresh Rate(Default).");
+
+#define DRRS_STATUS_BYTES_CNT  1000
+#define SET1
+#define RESET  0
+
+/*
+ * Structure to store data to create 2 framebuffers, fb[0] and fb[1] on a given
+ * display. To disturb the content of the screen, we replace fb[0] by fb[1] and
+ * vice versa.
+ */
+typedef struct {
+   int drm_fd;
+   uint32_t devid;
+   uint32_t handle[2];
+   igt_display_t display;
+   igt_output_t *output;
+   enum pipe pipe;
+   igt_plane_t *primary;
+   struct igt_fb fb[2];
+   uint32_t fb_id[2];
+} data_t;
+
+/*
+ * Structure to count vblank and note the starting time of the counter
+ */
+typedef struct {
+   unsigned int vbl_count;
+   struct timeval start;
+} 

Re: [Intel-gfx] linux-next: Tree for Oct 20 (gpu/drm/i915)

2016-10-20 Thread Zhenyu Wang
On 2016.10.20 21:25:03 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Daniel Vetter  wrote:
> > On Thu, Oct 20, 2016 at 7:37 PM, Randy Dunlap  wrote:
> >> On 10/19/16 20:20, Stephen Rothwell wrote:
> >>> Hi all,
> >>>
> >>> Changes since 20161019:
> >>>
> >>
> >> on i386: when CONFIG_ACPI is not enabled:
> >
> > Adding Zhenyu. Might be good to have a fix just for this that I
> > directly pick up, since I want to tag the first 4.10 pull for Dave
> > Airlie this w/e.
> 
> How about just this?
>

I'd like to not depend on acpi function any more, so just change that
for memremap. GVT-g driver currently only supports 64BIT kernel so will
fix that dependence. I'll send fix pull for Daniel later.

thanks

> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 6aedc96aa412..94914381e8ef 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -85,7 +85,7 @@ config DRM_I915_USERPTR
>  
>  config DRM_I915_GVT
>  bool "Enable Intel GVT-g graphics virtualization host support"
> -depends on DRM_I915
> +depends on DRM_I915 && ACPI
>  default n
>  help
> Choose this option if you want to enable Intel GVT-g graphics
> 
> 
> 
> > -Daniel
> >
> >> ../drivers/gpu/drm/i915/gvt/opregion.c: In function 
> >> 'intel_gvt_init_opregion':
> >> ../drivers/gpu/drm/i915/gvt/opregion.c:183:2: error: implicit declaration 
> >> of function 'acpi_os_ioremap' [-Werror=implicit-function-declaration]
> >>   gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
> >>   ^
> >> ../drivers/gpu/drm/i915/gvt/opregion.c:183:28: warning: assignment makes 
> >> pointer from integer without a cast [enabled by default]
> >>   gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
> >> ^
> >> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'read_pte64':
> >> ../drivers/gpu/drm/i915/gvt/gtt.c:277:2: warning: left shift count >= 
> >> width of type [enabled by default]
> >>   pte |= ioread32(addr + 4) << 32;
> >>   ^
> >> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_get_pfn':
> >> ../drivers/gpu/drm/i915/gvt/gtt.c:360:3: warning: left shift count >= 
> >> width of type [enabled by default]
> >>pfn = (e->val64 & ADDR_4K_MASK) >> 12;
> >>^
> >> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_set_pfn':
> >> ../drivers/gpu/drm/i915/gvt/gtt.c:373:3: warning: left shift count >= 
> >> width of type [enabled by default]
> >>e->val64 &= ~ADDR_4K_MASK;
> >>^
> >> ../drivers/gpu/drm/i915/gvt/gtt.c:374:3: warning: left shift count >= 
> >> width of type [enabled by default]
> >>pfn &= (ADDR_4K_MASK >> 12);
> >>^
> >> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gma_to_pml4_index':
> >> ../drivers/gpu/drm/i915/gvt/gtt.c:436:1: warning: right shift count >= 
> >> width of type [enabled by default]
> >>  DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
> >>  ^
> >>   CC  drivers/gpu/drm/radeon/si_smc.o
> >> In file included from ../drivers/gpu/drm/i915/i915_drv.h:46:0,
> >>  from ../drivers/gpu/drm/i915/gvt/gtt.c:36:
> >> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 
> >> 'intel_gvt_create_scratch_page':
> >> ../drivers/gpu/drm/i915/gvt/gtt.c:1945:47: warning: cast from pointer to 
> >> integer of different size [-Wpointer-to-int-cast]
> >>gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
> >>^
> >> ../include/drm/drmP.h:201:43: note: in definition of macro 'DRM_ERROR'
> >>   drm_printk(KERN_ERR, DRM_UT_NONE, fmt, ##__VA_ARGS__)
> >>^
> >> ../drivers/gpu/drm/i915/gvt/gtt.c:1945:3: note: in expansion of macro 
> >> 'gvt_err'
> >>gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
> >>^
> >>
> >>
> >>
> >> --
> >> ~Randy
> >> ___
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Jani Nikula, Intel Open Source Technology Center

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827


signature.asc
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] Redo a modeset on link training failure

2016-10-20 Thread Manasi Navare
Hi Ville,

I have implemented the code that we discussed where if the link training
fails, it would validate the modes on the new constraints and call
an atomic helper like drm_atomic_helper_connector_modeset() to redo
a modeset for the same mode. The two patches for this implemnetation is
are:

http://paste.ubuntu.com/23357104/
http://paste.ubuntu.com/23357105/

With this I can successfully trigger the modeset and retrain the link
at lower link rate. But I am getting a warning during intel_audio_codec_enable()
in intel_enable_ddi() during the commit phase on SKL.
Following is the dmesg log:

http://paste.ubuntu.com/23357075/

After further looking at it, I see that this calls drm_select_eld() function
that throws a warning if the mode_config mutex and modeset locks are held.
If I remove those WARN_ONs from there, I can get rid of this warning and
everything works smoothly.

Do you know if those WARN_ONs are required because these locks would be grabbed
when we are in modeset.

Regards
Manasi
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [i-g-t PATCH v2] igt/tools: Update intel_watermark with SKL support

2016-10-20 Thread Pandiyan, Dhinakaran
Cc'ing reviewers


On Tue, 2016-10-18 at 17:05 -0700, Dhinakaran Pandiyan wrote:
> Added support to print SKL watermark and DDB registers.
> 
> v2: Printed raw register data, renamed planes and combined two printf()'s
> (Ville)
> 
> Signed-off-by: Dhinakaran Pandiyan 
> ---
>  tools/intel_watermark.c | 150 
> +++-
>  1 file changed, 149 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/intel_watermark.c b/tools/intel_watermark.c
> index e9a2b05..81e394c 100644
> --- a/tools/intel_watermark.c
> +++ b/tools/intel_watermark.c
> @@ -120,6 +120,11 @@ static const char *endis(bool enabled)
>   return enabled ? "enabled" : "disabled";
>  }
>  
> +static const char endis_ast(bool enabled)
> +{
> + return enabled ? '*' : ' ';
> +}
> +
>  static int is_gen7_plus(uint32_t d)
>  {
>   return !(IS_GEN5(d) || IS_GEN6(d));
> @@ -130,6 +135,147 @@ static int is_hsw_plus(uint32_t d)
>   return !(IS_GEN5(d) || IS_GEN6(d) || IS_IVYBRIDGE(d));
>  }
>  
> +
> +static void skl_wm_dump(void)
> +{
> + int pipe, plane, level;
> + int num_pipes = 3;
> + int num_planes = 5;
> + int num_levels = 8;
> + uint32_t base_addr = 0x7, addr, wm_offset;
> + uint32_t wm[num_levels][num_pipes][num_planes];
> + uint32_t wm_trans[num_pipes][num_planes];
> + uint32_t buf_cfg[num_pipes][num_planes];
> + char reg_name[20];
> +
> + intel_register_access_init(intel_get_pci_device(), 0);
> +
> + for (pipe = 0; pipe < num_pipes; pipe++) {
> + for (plane = 0; plane < num_planes; plane++) {
> + addr =  base_addr +  pipe * 0x1000 + plane * 0x100;
> +
> + wm_trans[pipe][plane] = read_reg(addr + 0x00168);
> + buf_cfg[pipe][plane] = read_reg(addr + 0x0017C);
> + for (level = 0; level < num_levels; level++) {
> + wm_offset = addr + 0x00140 + level * 0x4;
> + wm[level][pipe][plane] = read_reg(wm_offset);
> + }
> + }
> + }
> +
> + for (plane = 0; plane < num_planes; plane++) {
> + for (level = 0; level < num_levels; level++) {
> + for (pipe = 0; pipe < num_pipes; pipe++) {
> + if (plane == 0)
> + snprintf(reg_name, sizeof(reg_name), 
> "%s_WM_%c_%1d","CUR",
> +  pipe_name(pipe), level);
> + else
> + snprintf(reg_name, sizeof(reg_name), 
> "%s_WM_%1d_%c_%1d","PLANE",
> +  plane, pipe_name(pipe), level);
> +
> + printf("%-19s %8x\t\t" , reg_name, 
> wm[level][pipe][plane]);
> + }
> + printf("\n");
> + }
> + printf("\n");
> + }
> +
> + for (plane = 0; plane < num_planes; plane++) {
> + for (pipe = 0; pipe < num_pipes; pipe++) {
> + if (plane == 0)
> + snprintf(reg_name, sizeof(reg_name), 
> "%s_WM_TRANS_%c", "CUR",
> +  pipe_name(pipe));
> + else
> + snprintf(reg_name, sizeof(reg_name), 
> "%s_WM_TRANS_%1d_%c", "PLANE",
> +  plane, pipe_name(pipe));
> +
> + printf("%-19s %8x\t\t", reg_name, 
> wm_trans[pipe][plane]);
> +
> + }
> + printf("\n");
> + }
> + printf("\n");
> +
> + for (plane = 0; plane < num_planes; plane++) {
> + for (pipe = 0; pipe < num_pipes; pipe++) {
> + if (plane == 0)
> + snprintf(reg_name, sizeof(reg_name), 
> "%s_BUF_CFG_%c", "CUR",
> +  pipe_name(pipe));
> + else
> + snprintf(reg_name, sizeof(reg_name), 
> "%s_BUF_CFG_%1d_%c", "PLANE",
> +  plane, pipe_name(pipe));
> +
> + printf("%-19s %8x\t\t", reg_name, 
> buf_cfg[pipe][plane]);
> + }
> + printf("\n");
> + }
> + printf("\n");
> +
> + for (pipe = 0; pipe < num_pipes; pipe++) {
> + uint32_t start, end, size;
> + uint32_t lines, blocks, enable;
> +
> + printf("PIPE_%c\n", pipe_name(pipe));
> + printf("LEVEL   CURSOR   PLANE_1   PLANE_2   PLANE_3   
> PLANE_4\n");
> + for (level = 0; level < num_levels; level++) {
> + printf("%5d  ", level);
> + for (plane = 0; plane < num_planes; plane++) {
> + blocks = 

Re: [Intel-gfx] [PATCH] drm/i915/dp: Increase cdclk when DP audio is enabled with 4 lanes and HBR2

2016-10-20 Thread Pandiyan, Dhinakaran
On Mon, 2016-10-17 at 11:33 +0300, Ville Syrjälä wrote:
> On Fri, Oct 14, 2016 at 08:33:37PM +, Pandiyan, Dhinakaran wrote:
> > On Thu, 2016-10-13 at 21:44 +0300, Ville Syrjälä wrote:
> > > On Thu, Oct 13, 2016 at 11:04:19AM -0700, Dhinakaran Pandiyan wrote:
> > > > According to BSpec, cdclk has to be not less than 432 MHz with DP audio
> > > > enabled, port width x4, and link rate HBR2 (5.4 GHz)
> > > > 
> > > > Having a lower cdclk triggers pipe underruns, which then lead to 
> > > > displays
> > > > continuously cycling off and on. This is essential for DP MST audio as 
> > > > the
> > > > link is trained at HBR2 and 4 lanes by default.
> > > > 
> > > > This should fix https://bugs.freedesktop.org/show_bug.cgi?id=97907
> > > > 
> > > > Signed-off-by: Dhinakaran Pandiyan 
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_display.c | 47 
> > > > +---
> > > >  1 file changed, 43 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_display.c 
> > > > b/drivers/gpu/drm/i915/intel_display.c
> > > > index cfcb03f..6a05183 100644
> > > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > > @@ -6603,14 +6603,43 @@ static int valleyview_modeset_calc_cdclk(struct 
> > > > drm_atomic_state *state)
> > > > return 0;
> > > >  }
> > > >  
> > > > +static bool cdclk_min_for_dp_audio(struct drm_atomic_state *state)
> > > > +{
> > > > +
> > > > +   struct drm_crtc_state *crtc_state;
> > > > +   struct drm_crtc *crtc;
> > > > +   int i;
> > > > +
> > > > +   /* BSpec says "Do not use DisplayPort with CDCLK less than 432 
> > > > MHz,
> > > > +* audio enabled, port width x4, and link rate HBR2 (5.4 GHz), 
> > > > or else
> > > > +* there may be audio corruption or screen corruption."
> > > > +*/
> > > > +
> > > > +   for_each_crtc_in_state(state, crtc, crtc_state, i) {
> > > > +   struct intel_crtc_state *pipe_config =
> > > > +   to_intel_crtc_state(crtc_state);
> > > > +
> > > > +   return (intel_crtc_has_dp_encoder(pipe_config) &&
> > > > +   pipe_config->has_audio &&
> > > > +   pipe_config->port_clock == 54 &&
> > > > +   pipe_config->lane_count == 4);
> > > > +   }
> > > 
> > > That's not good enough on account of crtcs not part of the state
> > > potentially needing the workaround as well. However, since we only do
> > > this when there's a modeset, I think we'll be covered by the
> > > connection_mutex, and so we should be able to peek at the current state
> > > of all crtcs without grabbing the corresponding crtc locks.
> > > 
> > 
> > Please correct me if I am wrong. Won't the first modeset that has all
> > the conditions met (DP + HBR2 + 4 lanes + audio) include the crtc
> > driving the display which triggered the modeset?
> > 
> > Since, the new cdclk freq that will be set is common for all the crtcs,
> > we don't need the workaround for crtcs that are not in state. 
> 
> There can be another modeset afterwards that doesn't need the w/a and
> that would then end up reducing cdclk below the required frequency.
> 

Got it, thanks for the explanation.


> > 
> > > So I think we'd be OK with something like:
> > > 
> > > WARN_ON(!locked(connection_mutex));
> > > 
> > > for_each_intel_crtc() {
> > >   /*
> > >* Peeking at the current state is safe since
> > >* we can only get here while holding the
> > >* connection_mutex.
> > >*/
> > >   crtc_state = intel_get_existing_crtc_state();
> > >   if (!crtc_state)
> > >   crtc_state = to_intel_crtc_state(crtc->base.state);
> > > 
> > >   ...
> > > }
> > > 
> > > The other option would be to track the min cdclk for each pipe in the
> > > top level state I suppose. We already do that for the pixel rate
> > > actually so that we can calculate the cdclk to begin with. Hmm. Maybe
> > > we should just switch to tracking the min cdclk per pipe instead of the
> > > pixel rate. Or did we need to the pixel rate itself for something else,
> > > Maarten?
> > > 
> > > Or we could perhaps replace the pixel rate/pixclk tracking with the peek
> > > approach entirely. Not quite sure. Would have to read the entire thing
> > > through.
> > > 
> > 
> > I thought of this, but the work around applies for only three platforms
> > (potentially just two) as of now. Does it warrant a driver wide change?
> > I have to check if mincdclk is useful elsewhere.
> 
> We need to do one of these options. No way around it if we need this
> w/a. Though I guess it's a little bit of an open question at the moment
> since on SKL it only supposedly applies up to D stepping which we don't
> care about. On BDW it seems to be for everything though.
> 

Yeah, you are right. 

I have a different workaround for SKL that does not involve cdclk.
Setting CHICKEN_TRANS_x bit 13 fixes the 

Re: [Intel-gfx] [PATCH 8/8] drm/i915/gen9+: Preserve old allocation from crtc_state.

2016-10-20 Thread Matt Roper
On Wed, Oct 12, 2016 at 03:28:21PM +0200, Maarten Lankhorst wrote:
> This is the last bit required for making nonblocking modesets work
> correctly. The state in intel_crtc->hw_ddb is not updated until
> somewhere in atomic commit, while the previous crtc state should be
> accurate if the ddb hasn't changed.
> 
> Signed-off-by: Maarten Lankhorst 

Can we get rid of hw_ddb completely and always pull from old state?  It
looks like the only other place it's still used is
skl_update_crtcs() -> skl_ddb_allocation_overlaps().


Matt

> ---
>  drivers/gpu/drm/i915/intel_display.c | 2 +-
>  drivers/gpu/drm/i915/intel_pm.c  | 6 +-
>  2 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index d3d7d9dc14a8..93e16da0aa51 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -14332,7 +14332,7 @@ static void skl_update_crtcs(struct drm_atomic_state 
> *state,
>* new ddb allocation to take effect.
>*/
>   if (!skl_ddb_entry_equal(>wm.skl.ddb,
> -  _crtc->hw_ddb) &&
> +  
> _intel_crtc_state(old_crtc_state)->wm.skl.ddb) &&
>   !crtc->state->active_changed &&
>   intel_state->wm_results.dirty_pipes != updated)
>   vbl_wait = true;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 18c62d1eea19..182e6b30b60a 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3091,7 +3091,11 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device 
> *dev,
>* we currently hold.
>*/
>   if (!intel_state->active_pipe_changes) {
> - *alloc = to_intel_crtc(for_crtc)->hw_ddb;
> + /*
> +  * alloc may be cleared by clear_intel_crtc_state,
> +  * copy from old state to be sure
> +  */
> + *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
>   return;
>   }
>  
> -- 
> 2.7.4
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Matt Roper
Graphics Software Engineer
IoTG Platform Enabling & Development
Intel Corporation
(916) 356-2795
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 06/11] drm/i915: Enable i915 perf stream for Haswell OA unit

2016-10-20 Thread Chris Wilson
On Thu, Oct 20, 2016 at 10:19:05PM +0100, Robert Bragg wrote:
> +int i915_gem_context_pin_legacy_rcs_state(struct drm_i915_private *dev_priv,
> +   struct i915_gem_context *ctx,
> +   u64 flags)

This is still no.

> +static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
> +{
> + struct drm_i915_gem_object *bo;
> + enum i915_map_type map;
> + struct i915_vma *vma;
> + int ret;
> +
> + BUG_ON(dev_priv->perf.oa.oa_buffer.obj);
> +
> + ret = i915_mutex_lock_interruptible(_priv->drm);
> + if (ret)
> + return ret;
> +
> + BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
> + BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
> +
> + bo = i915_gem_object_create(_priv->drm, OA_BUFFER_SIZE);
> + if (IS_ERR(bo)) {
> + DRM_ERROR("Failed to allocate OA buffer\n");
> + ret = PTR_ERR(bo);
> + goto unlock;
> + }
> + dev_priv->perf.oa.oa_buffer.obj = bo;
> +
> + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> + if (ret)
> + goto err_unref;
> +
> + /* PreHSW required 512K alignment, HSW requires 16M */
> + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, PIN_MAPPABLE);
> + if (IS_ERR(vma)) {
> + ret = PTR_ERR(vma);
> + goto err_unref;
> + }
> + dev_priv->perf.oa.oa_buffer.vma = vma;
> +
> + map = HAS_LLC(dev_priv) ? I915_MAP_WB : I915_MAP_WC;

You set the hw up to do coherent writes into the CPU cache, and then you
request WC access to the pages? With set_cache_level(LLC) you can use
MAP_WB on both llc and snoop based architectures. Fortunately this is
only HSW!

> + dev_priv->perf.oa.oa_buffer.gtt_offset = i915_ggtt_offset(vma);

I haven't spotted the advantage of storing both the ggtt_offset in
addition to the vma (or the bo as well as the vma).

> + dev_priv->perf.oa.oa_buffer.addr = i915_gem_object_pin_map(bo, map);
> + if (IS_ERR(dev_priv->perf.oa.oa_buffer.addr)) {
> + ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.addr);
> + goto err_unpin;
> + }

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 7/8] drm/i915/gen9+: Program watermarks as a separate step during evasion

2016-10-20 Thread Matt Roper
On Wed, Oct 12, 2016 at 03:28:20PM +0200, Maarten Lankhorst wrote:
> Instead of running the watermark updates from the callbacks run
> them from a separate hook atomic_evade_watermarks.

The commit message here is a bit terse.  I'd clarify that the change
we're making is that watermark register programming is no longer
happening in the same display callbacks that write general plane
registers, but rather in a new independent hook.  The key thing to
emphasize is that despite the refactoring, the watermark values will
still be written under the same vblank evasion that is covering the rest
of the planes' updates, so they'll still take effect on the same vblank.

> 
> This also gets rid of the global skl_results, which was required for
> keeping track of the current atomic commit.
> 
> Signed-off-by: Maarten Lankhorst 
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  7 ---
>  drivers/gpu/drm/i915/intel_display.c | 36 +-
>  drivers/gpu/drm/i915/intel_drv.h |  7 ---
>  drivers/gpu/drm/i915/intel_pm.c  | 38 
> ++--
>  drivers/gpu/drm/i915/intel_sprite.c  | 18 -
>  5 files changed, 28 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 09588c58148f..28e44cb611b8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2027,13 +2027,6 @@ struct drm_i915_private {
>*/
>   uint16_t skl_latency[8];
>  
> - /*
> -  * The skl_wm_values structure is a bit too big for stack
> -  * allocation, so we keep the staging struct where we store
> -  * intermediate results here instead.
> -  */
> - struct skl_wm_values skl_results;
> -
>   /* current hardware state */
>   union {
>   struct ilk_wm_values hw;
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index 340861826c46..d3d7d9dc14a8 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -3377,9 +3377,6 @@ static void skylake_update_primary_plane(struct 
> drm_plane *plane,
>   struct drm_i915_private *dev_priv = to_i915(dev);
>   struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
>   struct drm_framebuffer *fb = plane_state->base.fb;
> - const struct skl_wm_values *wm = _priv->wm.skl_results;
> - const struct skl_plane_wm *p_wm =
> - _state->wm.skl.optimal.planes[0];
>   int pipe = intel_crtc->pipe;
>   u32 plane_ctl;
>   unsigned int rotation = plane_state->base.rotation;
> @@ -3415,9 +3412,6 @@ static void skylake_update_primary_plane(struct 
> drm_plane *plane,
>   intel_crtc->adjusted_x = src_x;
>   intel_crtc->adjusted_y = src_y;
>  
> - if (wm->dirty_pipes & drm_crtc_mask(_crtc->base))
> - skl_write_plane_wm(intel_crtc, p_wm, >ddb, 0);
> -
>   I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
>   I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x);
>   I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
> @@ -3450,18 +3444,8 @@ static void skylake_disable_primary_plane(struct 
> drm_plane *primary,
>   struct drm_device *dev = crtc->dev;
>   struct drm_i915_private *dev_priv = to_i915(dev);
>   struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
> - const struct skl_plane_wm *p_wm = >wm.skl.optimal.planes[0];
>   int pipe = intel_crtc->pipe;
>  
> - /*
> -  * We only populate skl_results on watermark updates, and if the
> -  * plane's visiblity isn't actually changing neither is its watermarks.
> -  */
> - if (!crtc->primary->state->visible)
> - skl_write_plane_wm(intel_crtc, p_wm,
> -_priv->wm.skl_results.ddb, 0);
> -
>   I915_WRITE(PLANE_CTL(pipe, 0), 0);
>   I915_WRITE(PLANE_SURF(pipe, 0), 0);
>   POSTING_READ(PLANE_SURF(pipe, 0));
> @@ -10824,16 +10808,9 @@ static void i9xx_update_cursor(struct drm_crtc 
> *crtc, u32 base,
>   struct drm_device *dev = crtc->dev;
>   struct drm_i915_private *dev_priv = to_i915(dev);
>   struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
> - const struct skl_wm_values *wm = _priv->wm.skl_results;
> - const struct skl_plane_wm *p_wm =
> - >wm.skl.optimal.planes[PLANE_CURSOR];
>   int pipe = intel_crtc->pipe;
>   uint32_t cntl = 0;
>  
> - if (INTEL_GEN(dev_priv) >= 9 && wm->dirty_pipes & drm_crtc_mask(crtc))
> - skl_write_cursor_wm(intel_crtc, p_wm, >ddb);
> -
>   if (plane_state && plane_state->base.visible) {
>   cntl = MCURSOR_GAMMA_ENABLE;
>   

Re: [Intel-gfx] [PATCH v3 3/4] drm/i915: Clean up DDI DDC/AUX CH sanitation

2016-10-20 Thread Maarten Maathuis
I meant DON't suspect

On Thu, Oct 20, 2016 at 11:53 PM, Maarten Maathuis 
wrote:

> Also tested v3 on top of 4.8.3 (mainline git is a mess right now for
> booting).
>
> I did encounter a seemingly unrelated message during boot (including a
> WARN_ON):
> [drm:skylake_pfit_enable [i915]] *ERROR* Requesting pfit without getting a
> scaler first
>
> I suspect any causal relation with these patches.
>
> On Mon, Oct 17, 2016 at 8:07 PM,  wrote:
>
>> From: Ville Syrjälä 
>>
>> Now that we use the AUX and GMBUS assignment from VBT for all ports,
>> let's clean up the sanitization of the port information a bit.
>> Previosuly we only did this for port E, and only complained about a
>> non-standard assignment for the other ports. But as we know that
>> non-standard assignments are a fact of life, let's expand the
>> sanitization to all the ports.
>>
>> v2: Include a commit message, fix up the comments a bit
>> v3: Don't clobber other ports if the current port has no alternate aux
>> ch/ddc pin
>>
>> Cc: sta...@vger.kernel.org
>> Cc: Maarten Maathuis 
>> Tested-by: Maarten Maathuis 
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=97877
>> Signed-off-by: Ville Syrjälä 
>> Link: http://patchwork.freedesktop.org/patch/msgid/1476208368-5710
>> -4-git-send-email-ville.syrj...@linux.intel.com
>> Reviewed-by: Jim Bride  (v2)
>> ---
>>  drivers/gpu/drm/i915/intel_bios.c | 122 --
>> 
>>  1 file changed, 77 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_bios.c
>> b/drivers/gpu/drm/i915/intel_bios.c
>> index 83667e8cdd6b..a8ff8c099685 100644
>> --- a/drivers/gpu/drm/i915/intel_bios.c
>> +++ b/drivers/gpu/drm/i915/intel_bios.c
>> @@ -1035,6 +1035,77 @@ static u8 translate_iboost(u8 val)
>> return mapping[val];
>>  }
>>
>> +static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
>> +enum port port)
>> +{
>> +   const struct ddi_vbt_port_info *info =
>> +   _priv->vbt.ddi_port_info[port];
>> +   enum port p;
>> +
>> +   if (!info->alternate_ddc_pin)
>> +   return;
>> +
>> +   for_each_port_masked(p, (1 << port) - 1) {
>> +   struct ddi_vbt_port_info *i =
>> _priv->vbt.ddi_port_info[p];
>> +
>> +   if (info->alternate_ddc_pin != i->alternate_ddc_pin)
>> +   continue;
>> +
>> +   DRM_DEBUG_KMS("port %c trying to use the same DDC pin
>> (0x%x) as port %c, "
>> + "disabling port %c DVI/HDMI support\n",
>> + port_name(p), i->alternate_ddc_pin,
>> + port_name(port), port_name(p));
>> +
>> +   /*
>> +* If we have multiple ports supposedly sharing the
>> +* pin, then dvi/hdmi couldn't exist on the shared
>> +* port. Otherwise they share the same ddc bin and
>> +* system couldn't communicate with them separately.
>> +*
>> +* Due to parsing the ports in alphabetical order,
>> +* a higher port will always clobber a lower one.
>> +*/
>> +   i->supports_dvi = false;
>> +   i->supports_hdmi = false;
>> +   i->alternate_ddc_pin = 0;
>> +   }
>> +}
>> +
>> +static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
>> +   enum port port)
>> +{
>> +   const struct ddi_vbt_port_info *info =
>> +   _priv->vbt.ddi_port_info[port];
>> +   enum port p;
>> +
>> +   if (!info->alternate_aux_channel)
>> +   return;
>> +
>> +   for_each_port_masked(p, (1 << port) - 1) {
>> +   struct ddi_vbt_port_info *i =
>> _priv->vbt.ddi_port_info[p];
>> +
>> +   if (info->alternate_aux_channel !=
>> i->alternate_aux_channel)
>> +   continue;
>> +
>> +   DRM_DEBUG_KMS("port %c trying to use the same AUX CH
>> (0x%x) as port %c, "
>> + "disabling port %c DP support\n",
>> + port_name(p), i->alternate_aux_channel,
>> + port_name(port), port_name(p));
>> +
>> +   /*
>> +* If we have multiple ports supposedlt sharing the
>> +* aux channel, then DP couldn't exist on the shared
>> +* port. Otherwise they share the same aux channel
>> +* and system couldn't communicate with them separately.
>> +*
>> +* Due to parsing the ports in alphabetical order,
>> +* a higher port will always clobber a lower one.
>> +*/
>> +   i->supports_dp = false;
>> +   

Re: [Intel-gfx] [PATCH v3 3/4] drm/i915: Clean up DDI DDC/AUX CH sanitation

2016-10-20 Thread Maarten Maathuis
Also tested v3 on top of 4.8.3 (mainline git is a mess right now for
booting).

I did encounter a seemingly unrelated message during boot (including a
WARN_ON):
[drm:skylake_pfit_enable [i915]] *ERROR* Requesting pfit without getting a
scaler first

I suspect any causal relation with these patches.

On Mon, Oct 17, 2016 at 8:07 PM,  wrote:

> From: Ville Syrjälä 
>
> Now that we use the AUX and GMBUS assignment from VBT for all ports,
> let's clean up the sanitization of the port information a bit.
> Previosuly we only did this for port E, and only complained about a
> non-standard assignment for the other ports. But as we know that
> non-standard assignments are a fact of life, let's expand the
> sanitization to all the ports.
>
> v2: Include a commit message, fix up the comments a bit
> v3: Don't clobber other ports if the current port has no alternate aux
> ch/ddc pin
>
> Cc: sta...@vger.kernel.org
> Cc: Maarten Maathuis 
> Tested-by: Maarten Maathuis 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=97877
> Signed-off-by: Ville Syrjälä 
> Link: http://patchwork.freedesktop.org/patch/msgid/1476208368-
> 5710-4-git-send-email-ville.syrj...@linux.intel.com
> Reviewed-by: Jim Bride  (v2)
> ---
>  drivers/gpu/drm/i915/intel_bios.c | 122 --
> 
>  1 file changed, 77 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_bios.c
> b/drivers/gpu/drm/i915/intel_bios.c
> index 83667e8cdd6b..a8ff8c099685 100644
> --- a/drivers/gpu/drm/i915/intel_bios.c
> +++ b/drivers/gpu/drm/i915/intel_bios.c
> @@ -1035,6 +1035,77 @@ static u8 translate_iboost(u8 val)
> return mapping[val];
>  }
>
> +static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
> +enum port port)
> +{
> +   const struct ddi_vbt_port_info *info =
> +   _priv->vbt.ddi_port_info[port];
> +   enum port p;
> +
> +   if (!info->alternate_ddc_pin)
> +   return;
> +
> +   for_each_port_masked(p, (1 << port) - 1) {
> +   struct ddi_vbt_port_info *i = _priv->vbt.ddi_port_info[
> p];
> +
> +   if (info->alternate_ddc_pin != i->alternate_ddc_pin)
> +   continue;
> +
> +   DRM_DEBUG_KMS("port %c trying to use the same DDC pin
> (0x%x) as port %c, "
> + "disabling port %c DVI/HDMI support\n",
> + port_name(p), i->alternate_ddc_pin,
> + port_name(port), port_name(p));
> +
> +   /*
> +* If we have multiple ports supposedly sharing the
> +* pin, then dvi/hdmi couldn't exist on the shared
> +* port. Otherwise they share the same ddc bin and
> +* system couldn't communicate with them separately.
> +*
> +* Due to parsing the ports in alphabetical order,
> +* a higher port will always clobber a lower one.
> +*/
> +   i->supports_dvi = false;
> +   i->supports_hdmi = false;
> +   i->alternate_ddc_pin = 0;
> +   }
> +}
> +
> +static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
> +   enum port port)
> +{
> +   const struct ddi_vbt_port_info *info =
> +   _priv->vbt.ddi_port_info[port];
> +   enum port p;
> +
> +   if (!info->alternate_aux_channel)
> +   return;
> +
> +   for_each_port_masked(p, (1 << port) - 1) {
> +   struct ddi_vbt_port_info *i = _priv->vbt.ddi_port_info[
> p];
> +
> +   if (info->alternate_aux_channel !=
> i->alternate_aux_channel)
> +   continue;
> +
> +   DRM_DEBUG_KMS("port %c trying to use the same AUX CH
> (0x%x) as port %c, "
> + "disabling port %c DP support\n",
> + port_name(p), i->alternate_aux_channel,
> + port_name(port), port_name(p));
> +
> +   /*
> +* If we have multiple ports supposedlt sharing the
> +* aux channel, then DP couldn't exist on the shared
> +* port. Otherwise they share the same aux channel
> +* and system couldn't communicate with them separately.
> +*
> +* Due to parsing the ports in alphabetical order,
> +* a higher port will always clobber a lower one.
> +*/
> +   i->supports_dp = false;
> +   i->alternate_aux_channel = 0;
> +   }
> +}
> +
>  static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port
> port,
>const struct bdb_header *bdb)
>  {
> @@ -1109,54 +1180,15 @@ static void 

[Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [v6,01/11] drm/i915: Add i915 perf infrastructure

2016-10-20 Thread Patchwork
== Series Details ==

Series: series starting with [v6,01/11] drm/i915: Add i915 perf infrastructure
URL   : https://patchwork.freedesktop.org/series/14135/
State : failure

== Summary ==

Series 14135v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/14135/revisions/1/mbox/

Test drv_module_reload_basic:
dmesg-warn -> PASS   (fi-skl-6700hq)
skip   -> PASS   (fi-skl-6770hq)
Test gem_exec_parse:
Subgroup basic-rejected:
pass   -> FAIL   (fi-ivb-3770)
pass   -> FAIL   (fi-byt-j1900)
pass   -> FAIL   (fi-byt-n2820)
pass   -> FAIL   (fi-hsw-4770)
pass   -> FAIL   (fi-hsw-4770r)
pass   -> FAIL   (fi-ivb-3520m)
Test gem_exec_suspend:
Subgroup basic-s3:
pass   -> DMESG-WARN (fi-skl-6700hq)
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-a:
pass   -> DMESG-WARN (fi-skl-6700hq)
pass   -> FAIL   (fi-skl-6700k)
Subgroup suspend-read-crc-pipe-b:
pass   -> DMESG-WARN (fi-skl-6700hq)
Subgroup suspend-read-crc-pipe-c:
pass   -> DMESG-WARN (fi-skl-6700hq)

fi-bdw-5557u total:246  pass:231  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:246  pass:204  dwarn:0   dfail:0   fail:0   skip:42 
fi-bxt-t5700 total:246  pass:216  dwarn:0   dfail:0   fail:0   skip:30 
fi-byt-j1900 total:246  pass:214  dwarn:0   dfail:0   fail:1   skip:31 
fi-byt-n2820 total:246  pass:210  dwarn:0   dfail:0   fail:1   skip:35 
fi-hsw-4770  total:246  pass:223  dwarn:0   dfail:0   fail:1   skip:22 
fi-hsw-4770r total:246  pass:223  dwarn:0   dfail:0   fail:1   skip:22 
fi-ilk-650   total:246  pass:185  dwarn:0   dfail:0   fail:1   skip:60 
fi-ivb-3520m total:246  pass:220  dwarn:0   dfail:0   fail:1   skip:25 
fi-ivb-3770  total:246  pass:220  dwarn:0   dfail:0   fail:1   skip:25 
fi-kbl-7200u total:246  pass:222  dwarn:0   dfail:0   fail:0   skip:24 
fi-skl-6260u total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:246  pass:219  dwarn:4   dfail:0   fail:0   skip:23 
fi-skl-6700k total:246  pass:220  dwarn:1   dfail:0   fail:1   skip:24 
fi-skl-6770hqtotal:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:246  pass:210  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600  total:246  pass:209  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2779/

5113d7495dab3ea4d14a7698368c6be80f6c045c drm-intel-nightly: 
2016y-10m-20d-13h-31m-16s UTC integration manifest
1e4473e drm/i915: Add a kerneldoc summary for i915_perf.c
0c023de drm/i915: Add more Haswell OA metric sets
d3bea6a drm/i915: add oa_event_min_timer_exponent sysctl
42e8dc5 drm/i915: Add dev.i915.perf_stream_paranoid sysctl option
8fd324f drm/i915: advertise available metrics via sysfs
86a39c6 drm/i915: Enable i915 perf stream for Haswell OA unit
3838098 drm/i915: Add 'render basic' Haswell OA unit config
77b1c48 drm/i915: don't whitelist oacontrol in cmd parser
18aba00 drm/i915: return EACCES for check_cmd() failures
de403c1 drm/i915: rename OACONTROL GEN7_OACONTROL
4710663 drm/i915: Add i915 perf infrastructure

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Imre Deak
On Thu, 2016-10-20 at 23:50 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Imre Deak  wrote:
> > On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
> > > On Thu, 20 Oct 2016, Jani Nikula  wrote:
> > > > On Thu, 20 Oct 2016, Imre Deak  wrote:
> > > > > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> > > > > expected LS mode. It also appears to be in a state where AUX DPCD 
> > > > > reads
> > > > > will succeed but return garbage recovering only after a few hundreds 
> > > > > of
> > > > > milliseconds. After the recovery time DPCD reads will result in the
> > > > > correct values and things will continue to work. If I2C over AUX is
> > > > > attempted during this recovery time (implying an AUX write 
> > > > > transaction)
> > > > > the firmware won't recover and will stay in this broken state.
> > > > > 
> > > > > As a workaround check if the firmware is in PCON state after resume 
> > > > > and
> > > > > if so wait until the correct DPCD values are returned. For this we
> > > > > compare the branch descriptor with the one we cached during init time.
> > > > > If the firmware was in the LS state, we skip the w/a and continue as
> > > > > before.
> > > > > 
> > > > > Cc: Shashank Sharma 
> > > > > Cc: Ville Syrjälä 
> > > > > Cc: Jani Nikula 
> > > > > Signed-off-by: Imre Deak 
> > > > > ---
> > > > >  drivers/gpu/drm/i915/intel_dp.c |  2 +-
> > > > >  drivers/gpu/drm/i915/intel_drv.h|  6 -
> > > > >  drivers/gpu/drm/i915/intel_lspcon.c | 52 
> > > > > ++---
> > > > >  3 files changed, 48 insertions(+), 12 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c 
> > > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > > index e90211e..ec031db 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
> > > > >   intel_dp->DP = DP;
> > > > >  }
> > > > >  
> > > > > -static bool
> > > > > +bool
> > > > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
> > > > >  {
> > > > >   if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
> > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > index a35e241..9a2366e 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > @@ -972,7 +972,9 @@ struct intel_dp {
> > > > >  struct intel_lspcon {
> > > > >   bool active;
> > > > >   enum drm_lspcon_mode mode;
> > > > > - struct drm_dp_aux *aux;
> > > > > + struct intel_dp *intel_dp;
> > > > > + bool desc_valid;
> > > > > + struct intel_dp_desc desc;
> > > > 
> > > > I guess we could cache the desc in intel_dp directly. Independent of
> > > > this patch.
> > > > 
> > > > Also, I'm wondering if we could stick with just aux here, and read
> > > > something else from dpcd instead.
> > > > 
> > > > >  };
> > > > >  
> > > > >  struct intel_digital_port {
> > > > > @@ -1469,6 +1471,8 @@ static inline unsigned int 
> > > > > intel_dp_unused_lane_mask(int lane_count)
> > > > >  }
> > > > >  
> > > > >  bool
> > > > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > > > > +bool
> > > > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc 
> > > > > *desc);
> > > > >  void
> > > > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc 
> > > > > *desc);
> > > > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
> > > > > b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > index d2c8cb2..54c6173 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > @@ -30,7 +30,7 @@
> > > > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct 
> > > > > intel_lspcon *lspcon)
> > > > >  {
> > > > >   enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> > > > > - struct i2c_adapter *adapter = >aux->ddc;
> > > > > + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> > > > >  
> > > > >   if (drm_lspcon_get_mode(adapter, ¤t_mode))
> > > > >   DRM_ERROR("Error reading LSPCON mode\n");
> > > > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon 
> > > > > *lspcon,
> > > > >  {
> > > > >   int err;
> > > > >   enum drm_lspcon_mode current_mode;
> > > > > - struct i2c_adapter *adapter = >aux->ddc;
> > > > > + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> > > > >  
> > > > >   err = drm_lspcon_get_mode(adapter, ¤t_mode);
> > > > >   if (err) {
> > > > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon 
> > > > > *lspcon,
> > > > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
> > > > >  {
> > > > >   enum 

Re: [Intel-gfx] [PATCH i-g-t 3/3] tests/kms_sysfs_edid_timing: Convert sh script to C version.

2016-10-20 Thread Chris Wilson
On Thu, Oct 20, 2016 at 10:36:49PM +0300, Marius Vlad wrote:
> While at it, make available time macros to other users.
> 
> Signed-off-by: Marius Vlad 
> ---
>  benchmarks/gem_syslatency.c   |  4 ---
>  lib/igt_core.c|  3 --
>  lib/igt_core.h|  3 ++
>  tests/Makefile.sources|  2 +-
>  tests/drv_hangman.c   |  1 -
>  tests/gem_wait.c  |  4 ---
>  tests/kms_flip.c  |  3 --
>  tests/kms_sysfs_edid_timing   | 25 -
>  tests/kms_sysfs_edid_timing.c | 82 
> +++
>  9 files changed, 86 insertions(+), 41 deletions(-)
>  delete mode 100755 tests/kms_sysfs_edid_timing
>  create mode 100644 tests/kms_sysfs_edid_timing.c
> 
> diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
> index 6cad3a0..83bfac7 100644
> --- a/benchmarks/gem_syslatency.c
> +++ b/benchmarks/gem_syslatency.c
> @@ -133,10 +133,6 @@ static void *gem_busyspin(void *arg)
>   return NULL;
>  }
>  
> -#define MSEC_PER_SEC (1000)
> -#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
> -#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
> -
>  static double elapsed(const struct timespec *a, const struct timespec *b)
>  {
>   return 1e9*(b->tv_sec - a->tv_sec) + (b->tv_nsec - a ->tv_nsec);
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index 9cd5f98..f64c809 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -398,9 +398,6 @@ error:
>   return -errno;
>  }
>  
> -#define MSEC_PER_SEC (1000)
> -#define USEC_PER_SEC (1000*MSEC_PER_SEC)
> -#define NSEC_PER_SEC (1000*USEC_PER_SEC)
>  uint64_t igt_nsec_elapsed(struct timespec *start)
>  {
>   struct timespec now;
> diff --git a/lib/igt_core.h b/lib/igt_core.h
> index 03be757..a45e334 100644
> --- a/lib/igt_core.h
> +++ b/lib/igt_core.h
> @@ -847,6 +847,9 @@ extern enum igt_log_level igt_log_level;
>  void igt_set_timeout(unsigned int seconds,
>const char *op);
>  
> +#define MSEC_PER_SEC (1000)
> +#define USEC_PER_SEC (1000*MSEC_PER_SEC)
> +#define NSEC_PER_SEC (1000*USEC_PER_SEC)
>  /**
>   * igt_nsec_elapsed:
>   * @start: measure from this point in time
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index c35ea11..969ef0b 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -210,6 +210,7 @@ TESTS_progs = \
>   kms_mmap_write_crc \
>   kms_pwrite_crc \
>   kms_sink_crc_basic \
> + kms_sysfs_edid_timing \
>   prime_udl \
>   drv_module_reload_basic \
>   $(NULL)
> @@ -222,7 +223,6 @@ TESTS_scripts_M = \
>  TESTS_scripts = \
>   debugfs_emon_crash \
>   drv_debugfs_reader \
> - kms_sysfs_edid_timing \
>   sysfs_l3_parity \
>   test_rte_check \
>   tools_test \
> diff --git a/tests/drv_hangman.c b/tests/drv_hangman.c
> index 953a4c6..19d809c 100644
> --- a/tests/drv_hangman.c
> +++ b/tests/drv_hangman.c
> @@ -282,7 +282,6 @@ static void test_error_state_capture(unsigned ring_id,
>   * case and it takes a lot more time to wrap, so the acthd can potentially 
> keep
>   * increasing for a long time
>   */
> -#define NSEC_PER_SEC 10LL
>  static void hangcheck_unterminated(void)
>  {
>   int fd;
> diff --git a/tests/gem_wait.c b/tests/gem_wait.c
> index b4127de..db04958 100644
> --- a/tests/gem_wait.c
> +++ b/tests/gem_wait.c
> @@ -83,10 +83,6 @@ static void sigiter(int sig, siginfo_t *info, void *arg)
>   __sync_synchronize();
>  }
>  
> -#define MSEC_PER_SEC (1000)
> -#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
> -#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
> -
>  #define BUSY 1
>  #define HANG 2
>  static void basic(int fd, unsigned engine, unsigned flags)
> diff --git a/tests/kms_flip.c b/tests/kms_flip.c
> index 7646aaf..842bc3a 100644
> --- a/tests/kms_flip.c
> +++ b/tests/kms_flip.c
> @@ -83,9 +83,6 @@
>  #define DRM_CAP_TIMESTAMP_MONOTONIC 6
>  #endif
>  
> -#define USEC_PER_SEC 100L
> -#define NSEC_PER_SEC 10L
> -
>  drmModeRes *resources;
>  int drm_fd;
>  static drm_intel_bufmgr *bufmgr;
> diff --git a/tests/kms_sysfs_edid_timing b/tests/kms_sysfs_edid_timing
> deleted file mode 100755
> index 46ea540..000
> --- a/tests/kms_sysfs_edid_timing
> +++ /dev/null
> @@ -1,25 +0,0 @@
> -#!/bin/bash
> -#
> -# This check the time we take to read the content of all the possible 
> connectors.
> -# Without the edid -ENXIO patch 
> (http://permalink.gmane.org/gmane.comp.video.dri.devel/62083),
> -# we sometimes take a *really* long time. So let's just check for some 
> reasonable timing here
> -#
> -
> -DRM_LIB_ALLOW_NO_MASTER=1
> -
> -SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
> -. $SOURCE_DIR/drm_lib.sh
> -
> -TIME1=$(date +%s%N)
> -cat $(find /sys/devices/|grep drm | grep /status) > /dev/null
> -TIME2=$(date +%s%N)
> -
> -# time in ms
> -RES=$(((TIME2 - TIME1) / 100))
> -
> -if [ $RES -gt 600 ]; then
> - echo "Talking to outputs took ${RES}ms, something is wrong"
> - exit $IGT_EXIT_FAILURE
> 

[Intel-gfx] [PATCH v6 11/11] drm/i915: Add a kerneldoc summary for i915_perf.c

2016-10-20 Thread Robert Bragg
In particular this tries to capture for posterity some of the early
challenges we had with using the core perf infrastructure in case we
ever want to revisit adapting perf for device metrics.

Cc: Chris Wilson 
Signed-off-by: Robert Bragg 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/i915_perf.c | 163 +++
 1 file changed, 163 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 4e985dd..1e29655 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -24,6 +24,169 @@
  *   Robert Bragg 
  */
 
+
+/**
+ * DOC: i915 Perf, streaming API for GPU metrics
+ *
+ * Gen graphics supports a large number of performance counters that can help
+ * driver and application developers understand and optimize their use of the
+ * GPU.
+ *
+ * This i915 perf interface enables userspace to configure and open a file
+ * descriptor representing a stream of GPU metrics which can then be read() as
+ * a stream of sample records.
+ *
+ * The interface is particularly suited to exposing buffered metrics that are
+ * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU.
+ *
+ * Streams representing a single context are accessible to applications with a
+ * corresponding drm file descriptor, such that OpenGL can use the interface
+ * without special privileges. Access to system-wide metrics requires root
+ * privileges by default, unless changed via the dev.i915.perf_event_paranoid
+ * sysctl option.
+ *
+ *
+ * The interface was initially inspired by the core Perf infrastructure but
+ * some notable differences are:
+ *
+ * i915 perf file descriptors represent a "stream" instead of an "event"; where
+ * a perf event primarily corresponds to a single 64bit value, while a stream
+ * might sample sets of tightly-coupled counters, depending on the
+ * configuration.  For example the Gen OA unit isn't designed to support
+ * orthogonal configurations of individual counters; it's configured for a set
+ * of related counters. Samples for an i915 perf stream capturing OA metrics
+ * will include a set of counter values packed in a compact HW specific format.
+ * The OA unit supports a number of different packing formats which can be
+ * selected by the user opening the stream. Perf has support for grouping
+ * events, but each event in the group is configured, validated and
+ * authenticated individually with separate system calls.
+ *
+ * i915 perf stream configurations are provided as an array of u64 (key,value)
+ * pairs, instead of a fixed struct with multiple miscellaneous config members,
+ * interleaved with event-type specific members.
+ *
+ * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
+ * The supported metrics are being written to memory by the GPU unsynchronized
+ * with the CPU, using HW specific packing formats for counter sets. Sometimes
+ * the constraints on HW configuration require reports to be filtered before it
+ * would be acceptable to expose them to unprivileged applications - to hide
+ * the metrics of other processes/contexts. For these use cases a read() based
+ * interface is a good fit, and provides an opportunity to filter data as it
+ * gets copied from the GPU mapped buffers to userspace buffers.
+ *
+ *
+ * Some notes regarding Linux Perf:
+ * 
+ *
+ * The first prototype of this driver was based on the core perf
+ * infrastructure, and while we did make that mostly work, with some changes to
+ * perf, we found we were breaking or working around too many assumptions baked
+ * into perf's currently cpu centric design.
+ *
+ * In the end we didn't see a clear benefit to making perf's implementation and
+ * interface more complex by changing design assumptions while we knew we still
+ * wouldn't be able to use any existing perf based userspace tools.
+ *
+ * Also considering the Gen specific nature of the Observability hardware and
+ * how userspace will sometimes need to combine i915 perf OA metrics with
+ * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
+ * expecting the interface to be used by a platform specific userspace such as
+ * OpenGL or tools. This is to say; we aren't inherently missing out on having
+ * a standard vendor/architecture agnostic interface by not using perf.
+ *
+ *
+ * For posterity, in case we might re-visit trying to adapt core perf to be
+ * better suited to exposing i915 metrics these were the main pain points we
+ * hit:
+ *
+ * - The perf based OA PMU driver broke some significant design assumptions:
+ *
+ *   Existing perf pmus are used for profiling work on a cpu and we were
+ *   introducing the idea of _IS_DEVICE pmus with different security
+ *   implications, the need to fake cpu-related data (such as user/kernel
+ *   registers) to fit with perf's current 

[Intel-gfx] [PATCH v6 07/11] drm/i915: advertise available metrics via sysfs

2016-10-20 Thread Robert Bragg
Each metric set is given a sysfs entry like:

/sys/class/drm/card0/metrics//id

This allows userspace to enumerate the specific sets that are available
for the current system. The 'id' file contains an unsigned integer that
can be used to open the associated metric set via
DRM_IOCTL_I915_PERF_OPEN. The  is a globally unique ID for a
specific OA unit register configuration that can be reliably used by
userspace as a key to lookup corresponding counter meta data and
normalization equations.

The guid registry is currently maintained as part of gputop along with
the XML metric set descriptions and code generation scripts, ref:

 https://github.com/rib/gputop
 > gputop-data/guids.xml
 > scripts/update-guids.py
 > gputop-data/oa-*.xml
 > scripts/i915-perf-kernelgen.py

 $ make -C gputop-data -f Makefile.xml SYSFS=1 WHITELIST=RenderBasic

Signed-off-by: Robert Bragg 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/i915_drv.c|  5 
 drivers/gpu/drm/i915/i915_drv.h|  4 +++
 drivers/gpu/drm/i915/i915_oa_hsw.c | 51 +
 drivers/gpu/drm/i915/i915_oa_hsw.h |  4 +++
 drivers/gpu/drm/i915/i915_perf.c   | 52 ++
 5 files changed, 116 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5449579..3b6f586 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1115,6 +1115,9 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
if (drm_dev_register(dev, 0) == 0) {
i915_debugfs_register(dev_priv);
i915_setup_sysfs(dev_priv);
+
+   /* Depends on sysfs having been initialized */
+   i915_perf_register(dev_priv);
} else
DRM_ERROR("Failed to register driver for userspace access!\n");
 
@@ -1151,6 +1154,8 @@ static void i915_driver_unregister(struct 
drm_i915_private *dev_priv)
acpi_video_unregister();
intel_opregion_unregister(dev_priv);
 
+   i915_perf_unregister(dev_priv);
+
i915_teardown_sysfs(dev_priv);
i915_debugfs_unregister(dev_priv);
drm_dev_unregister(_priv->drm);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b234412..3b86427 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2161,6 +2161,8 @@ struct drm_i915_private {
struct {
bool initialized;
 
+   struct kobject *metrics_kobj;
+
struct mutex lock;
struct list_head streams;
 
@@ -3752,6 +3754,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
 /* i915_perf.c */
 extern void i915_perf_init(struct drm_i915_private *dev_priv);
 extern void i915_perf_fini(struct drm_i915_private *dev_priv);
+extern void i915_perf_register(struct drm_i915_private *dev_priv);
+extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
 
 /* i915_suspend.c */
 extern int i915_save_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c 
b/drivers/gpu/drm/i915/i915_oa_hsw.c
index 8906380..19f272b 100644
--- a/drivers/gpu/drm/i915/i915_oa_hsw.c
+++ b/drivers/gpu/drm/i915/i915_oa_hsw.c
@@ -24,6 +24,8 @@
  *
  */
 
+#include 
+
 #include "i915_drv.h"
 #include "i915_oa_hsw.h"
 
@@ -142,3 +144,52 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private 
*dev_priv)
return -ENODEV;
}
 }
+
+static ssize_t
+show_render_basic_id(struct device *kdev, struct device_attribute *attr, char 
*buf)
+{
+   return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC);
+}
+
+static struct device_attribute dev_attr_render_basic_id = {
+   .attr = { .name = "id", .mode = S_IRUGO },
+   .show = show_render_basic_id,
+   .store = NULL,
+};
+
+static struct attribute *attrs_render_basic[] = {
+   _attr_render_basic_id.attr,
+   NULL,
+};
+
+static struct attribute_group group_render_basic = {
+   .name = "403d8832-1a27-4aa6-a64e-f5389ce7b212",
+   .attrs =  attrs_render_basic,
+};
+
+int
+i915_perf_register_sysfs_hsw(struct drm_i915_private *dev_priv)
+{
+   int mux_len;
+   int ret = 0;
+
+   if (get_render_basic_mux_config(dev_priv, _len)) {
+   ret = sysfs_create_group(dev_priv->perf.metrics_kobj, 
_render_basic);
+   if (ret)
+   goto error_render_basic;
+   }
+
+   return 0;
+
+error_render_basic:
+   return ret;
+}
+
+void
+i915_perf_unregister_sysfs_hsw(struct drm_i915_private *dev_priv)
+{
+   int mux_len;
+
+   if (get_render_basic_mux_config(dev_priv, _len))
+   sysfs_remove_group(dev_priv->perf.metrics_kobj, 
_render_basic);
+}
diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.h 
b/drivers/gpu/drm/i915/i915_oa_hsw.h
index b618a1f..429a229 100644
--- a/drivers/gpu/drm/i915/i915_oa_hsw.h
+++ b/drivers/gpu/drm/i915/i915_oa_hsw.h

[Intel-gfx] [PATCH v6 08/11] drm/i915: Add dev.i915.perf_stream_paranoid sysctl option

2016-10-20 Thread Robert Bragg
Consistent with the kernel.perf_event_paranoid sysctl option that can
allow non-root users to access system wide cpu metrics, this can
optionally allow non-root users to access system wide OA counter metrics
from Gen graphics hardware.

Signed-off-by: Robert Bragg 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_perf.c | 50 +++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3b86427..66629bc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2162,6 +2162,7 @@ struct drm_i915_private {
bool initialized;
 
struct kobject *metrics_kobj;
+   struct ctl_table_header *sysctl_header;
 
struct mutex lock;
struct list_head streams;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c45bba5..1d61731 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -64,6 +64,11 @@
 #define POLL_FREQUENCY 200
 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
 
+/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
+static int zero;
+static int one = 1;
+static u32 i915_perf_stream_paranoid = true;
+
 /* The maximum exponent the hardware accepts is 63 (essentially it selects one
  * of the 64bit timestamp bits to trigger reports from) but there's currently
  * no known use case for sampling as infrequently as once per 47 thousand 
years.
@@ -1206,7 +1211,13 @@ i915_perf_open_ioctl_locked(struct drm_i915_private 
*dev_priv,
}
}
 
-   if (!specific_ctx && !capable(CAP_SYS_ADMIN)) {
+   /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
+* we check a dev.i915.perf_stream_paranoid sysctl option
+* to determine if it's ok to access system wide OA counters
+* without CAP_SYS_ADMIN privileges.
+*/
+   if (!specific_ctx &&
+   i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
DRM_ERROR("Insufficient privileges to open system-wide i915 
perf stream\n");
ret = -EACCES;
goto err_ctx;
@@ -1450,6 +1461,39 @@ void i915_perf_unregister(struct drm_i915_private 
*dev_priv)
dev_priv->perf.metrics_kobj = NULL;
 }
 
+static struct ctl_table oa_table[] = {
+   {
+.procname = "perf_stream_paranoid",
+.data = _perf_stream_paranoid,
+.maxlen = sizeof(i915_perf_stream_paranoid),
+.mode = 0644,
+.proc_handler = proc_dointvec_minmax,
+.extra1 = ,
+.extra2 = ,
+},
+   {}
+};
+
+static struct ctl_table i915_root[] = {
+   {
+.procname = "i915",
+.maxlen = 0,
+.mode = 0555,
+.child = oa_table,
+},
+   {}
+};
+
+static struct ctl_table dev_root[] = {
+   {
+.procname = "dev",
+.maxlen = 0,
+.mode = 0555,
+.child = i915_root,
+},
+   {}
+};
+
 void i915_perf_init(struct drm_i915_private *dev_priv)
 {
if (!IS_HASWELL(dev_priv))
@@ -1482,6 +1526,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.n_builtin_sets =
i915_oa_n_builtin_metric_sets_hsw;
 
+   dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
+
dev_priv->perf.initialized = true;
 }
 
@@ -1490,6 +1536,8 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
if (!dev_priv->perf.initialized)
return;
 
+   unregister_sysctl_table(dev_priv->perf.sysctl_header);
+
memset(_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
dev_priv->perf.initialized = false;
 }
-- 
2.10.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 06/11] drm/i915: Enable i915 perf stream for Haswell OA unit

2016-10-20 Thread Robert Bragg
Gen graphics hardware can be set up to periodically write snapshots of
performance counters into a circular buffer via its Observation
Architecture and this patch exposes that capability to userspace via the
i915 perf interface.

v2:
   Make sure to initialize ->specific_ctx_id when opening, without
   relying on _pin_notify hook, in case ctx already pinned.

Cc: Chris Wilson 
Signed-off-by: Robert Bragg 
Signed-off-by: Zhenyu Wang 

factor out init_specific_ctx_id func
---
 drivers/gpu/drm/i915/i915_drv.h |   72 ++-
 drivers/gpu/drm/i915/i915_gem_context.c |   22 +-
 drivers/gpu/drm/i915/i915_perf.c| 1034 ++-
 drivers/gpu/drm/i915/i915_reg.h |  338 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   11 +-
 include/uapi/drm/i915_drm.h |   70 ++-
 6 files changed, 1515 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 28f3f77..b234412 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1760,6 +1760,11 @@ struct intel_wm_config {
bool sprites_scaled;
 };
 
+struct i915_oa_format {
+   u32 format;
+   int size;
+};
+
 struct i915_oa_reg {
i915_reg_t addr;
u32 value;
@@ -1780,11 +1785,6 @@ struct i915_perf_stream_ops {
 */
void (*disable)(struct i915_perf_stream *stream);
 
-   /* Return: true if any i915 perf records are ready to read()
-* for this stream.
-*/
-   bool (*can_read)(struct i915_perf_stream *stream);
-
/* Call poll_wait, passing a wait queue that will be woken
 * once there is something ready to read() for the stream
 */
@@ -1794,9 +1794,7 @@ struct i915_perf_stream_ops {
 
/* For handling a blocking read, wait until there is something
 * to ready to read() for the stream. E.g. wait on the same
-* wait queue that would be passed to poll_wait() until
-* ->can_read() returns true (if its safe to call ->can_read()
-* without the i915 perf lock held).
+* wait queue that would be passed to poll_wait().
 */
int (*wait_unlocked)(struct i915_perf_stream *stream);
 
@@ -1836,11 +1834,28 @@ struct i915_perf_stream {
struct list_head link;
 
u32 sample_flags;
+   int sample_size;
 
struct i915_gem_context *ctx;
bool enabled;
 
-   struct i915_perf_stream_ops *ops;
+   const struct i915_perf_stream_ops *ops;
+};
+
+struct i915_oa_ops {
+   void (*init_oa_buffer)(struct drm_i915_private *dev_priv);
+   int (*enable_metric_set)(struct drm_i915_private *dev_priv);
+   void (*disable_metric_set)(struct drm_i915_private *dev_priv);
+   void (*oa_enable)(struct drm_i915_private *dev_priv);
+   void (*oa_disable)(struct drm_i915_private *dev_priv);
+   void (*update_oacontrol)(struct drm_i915_private *dev_priv);
+   void (*update_hw_ctx_id_locked)(struct drm_i915_private *dev_priv,
+   u32 ctx_id);
+   int (*read)(struct i915_perf_stream *stream,
+   char __user *buf,
+   size_t count,
+   size_t *offset);
+   bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
 };
 
 struct drm_i915_private {
@@ -2145,16 +2160,48 @@ struct drm_i915_private {
 
struct {
bool initialized;
+
struct mutex lock;
struct list_head streams;
 
+   spinlock_t hook_lock;
+
struct {
-   u32 metrics_set;
+   struct i915_perf_stream *exclusive_stream;
+
+   u32 specific_ctx_id;
+
+   struct hrtimer poll_check_timer;
+   wait_queue_head_t poll_wq;
+   atomic_t pollin;
+
+   bool periodic;
+   int period_exponent;
+   int timestamp_frequency;
+
+   int tail_margin;
+
+   int metrics_set;
 
const struct i915_oa_reg *mux_regs;
int mux_regs_len;
const struct i915_oa_reg *b_counter_regs;
int b_counter_regs_len;
+
+   struct {
+   struct drm_i915_gem_object *obj;
+   struct i915_vma *vma;
+   u32 gtt_offset;
+   u8 *addr;
+   int format;
+   int format_size;
+   } oa_buffer;
+
+   u32 gen7_latched_oastatus1;
+
+   struct i915_oa_ops ops;
+   const struct i915_oa_format *oa_formats;
+   int n_builtin_sets;
   

[Intel-gfx] [PATCH v6 09/11] drm/i915: add oa_event_min_timer_exponent sysctl

2016-10-20 Thread Robert Bragg
The minimal sampling period is now configurable via a
dev.i915.oa_min_timer_exponent sysctl parameter.

Following the precedent set by perf, the default is the minimum that
won't (on its own) exceed the default kernel.perf_event_max_sample_rate
default of 10 samples/s.

Signed-off-by: Robert Bragg 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/i915_perf.c | 41 
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1d61731..4e985dd 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -82,6 +82,22 @@ static u32 i915_perf_stream_paranoid = true;
 #define INVALID_CTX_ID 0x
 
 
+/* for sysctl proc_dointvec_minmax of i915_oa_min_timer_exponent */
+static int oa_exponent_max = OA_EXPONENT_MAX;
+
+/* Theoretically we can program the OA unit to sample every 160ns but don't
+ * allow that by default unless root...
+ *
+ * The period is derived from the exponent as:
+ *
+ *   period = 80ns * 2^(exponent + 1)
+ *
+ * Referring to perf's kernel.perf_event_max_sample_rate for a precedent
+ * (10 by default); with an OA exponent of 6 we get a period of 10.240
+ * microseconds - just under 10Hz
+ */
+static u32 i915_oa_min_timer_exponent = 6;
+
 /* XXX: beware if future OA HW adds new report formats that the current
  * code assumes all reports have a power-of-two size and ~(size - 1) can
  * be used as a mask to align the OA tail pointer.
@@ -1349,21 +1365,13 @@ static int read_properties_unlocked(struct 
drm_i915_private *dev_priv,
return -EINVAL;
}
 
-   /* NB: The exponent represents a period as follows:
-*
-*   80ns * 2^(period_exponent + 1)
-*
-* Theoretically we can program the OA unit to sample
+   /* Theoretically we can program the OA unit to sample
 * every 160ns but don't allow that by default unless
 * root.
-*
-* Referring to perf's
-* kernel.perf_event_max_sample_rate for a precedent
-* (10 by default); with an OA exponent of 6 we get
-* a period of 10.240 microseconds -just under 10Hz
 */
-   if (value < 6 && !capable(CAP_SYS_ADMIN)) {
-   DRM_ERROR("Sampling period too high without 
root privileges\n");
+   if (value < i915_oa_min_timer_exponent &&
+   !capable(CAP_SYS_ADMIN)) {
+   DRM_ERROR("OA timer exponent too low without 
root privileges\n");
return -EACCES;
}
 
@@ -1471,6 +1479,15 @@ static struct ctl_table oa_table[] = {
 .extra1 = ,
 .extra2 = ,
 },
+   {
+.procname = "oa_min_timer_exponent",
+.data = _oa_min_timer_exponent,
+.maxlen = sizeof(i915_oa_min_timer_exponent),
+.mode = 0644,
+.proc_handler = proc_dointvec_minmax,
+.extra1 = ,
+.extra2 = _exponent_max,
+},
{}
 };
 
-- 
2.10.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 10/11] drm/i915: Add more Haswell OA metric sets

2016-10-20 Thread Robert Bragg
This adds 'compute', 'compute extended', 'memory reads', 'memory writes'
and 'sampler balance' metric sets for Haswell.

The code is auto generated from an XML description of metric sets,
currently maintained in gputop, ref:

 https://github.com/rib/gputop
 > gputop-data/oa-*.xml
 > scripts/i915-perf-kernelgen.py

 $ make -C gputop-data -f Makefile.xml

Signed-off-by: Robert Bragg 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/i915_oa_hsw.c | 559 -
 1 file changed, 558 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c 
b/drivers/gpu/drm/i915/i915_oa_hsw.c
index 19f272b..cd2a23a 100644
--- a/drivers/gpu/drm/i915/i915_oa_hsw.c
+++ b/drivers/gpu/drm/i915/i915_oa_hsw.c
@@ -31,9 +31,14 @@
 
 enum metric_set_id {
METRIC_SET_ID_RENDER_BASIC = 1,
+   METRIC_SET_ID_COMPUTE_BASIC,
+   METRIC_SET_ID_COMPUTE_EXTENDED,
+   METRIC_SET_ID_MEMORY_READS,
+   METRIC_SET_ID_MEMORY_WRITES,
+   METRIC_SET_ID_SAMPLER_BALANCE,
 };
 
-int i915_oa_n_builtin_metric_sets_hsw = 1;
+int i915_oa_n_builtin_metric_sets_hsw = 6;
 
 static const struct i915_oa_reg b_counter_config_render_basic[] = {
{ _MMIO(0x2724), 0x0080 },
@@ -112,6 +117,298 @@ get_render_basic_mux_config(struct drm_i915_private 
*dev_priv,
return mux_config_render_basic;
 }
 
+static const struct i915_oa_reg b_counter_config_compute_basic[] = {
+   { _MMIO(0x2710), 0x },
+   { _MMIO(0x2714), 0x0080 },
+   { _MMIO(0x2718), 0x },
+   { _MMIO(0x271c), 0x },
+   { _MMIO(0x2720), 0x },
+   { _MMIO(0x2724), 0x0080 },
+   { _MMIO(0x2728), 0x },
+   { _MMIO(0x272c), 0x },
+   { _MMIO(0x2740), 0x },
+   { _MMIO(0x2744), 0x },
+   { _MMIO(0x2748), 0x },
+   { _MMIO(0x274c), 0x },
+   { _MMIO(0x2750), 0x },
+   { _MMIO(0x2754), 0x },
+   { _MMIO(0x2758), 0x },
+   { _MMIO(0x275c), 0x },
+   { _MMIO(0x236c), 0x },
+};
+
+static const struct i915_oa_reg mux_config_compute_basic[] = {
+   { _MMIO(0x253a4), 0x },
+   { _MMIO(0x2681c), 0x01f00800 },
+   { _MMIO(0x26820), 0x1000 },
+   { _MMIO(0x2781c), 0x01f00800 },
+   { _MMIO(0x26520), 0x0007 },
+   { _MMIO(0x265a0), 0x0007 },
+   { _MMIO(0x25380), 0x0010 },
+   { _MMIO(0x2538c), 0x0030 },
+   { _MMIO(0x25384), 0xaa8a },
+   { _MMIO(0x25404), 0x },
+   { _MMIO(0x26800), 0x4202 },
+   { _MMIO(0x26808), 0x00605817 },
+   { _MMIO(0x2680c), 0x10001005 },
+   { _MMIO(0x26804), 0x },
+   { _MMIO(0x27800), 0x0102 },
+   { _MMIO(0x27808), 0x0c0701e0 },
+   { _MMIO(0x2780c), 0x000200a0 },
+   { _MMIO(0x27804), 0x },
+   { _MMIO(0x26484), 0x4400 },
+   { _MMIO(0x26704), 0x4400 },
+   { _MMIO(0x26500), 0x0006 },
+   { _MMIO(0x26510), 0x0001 },
+   { _MMIO(0x26504), 0x8800 },
+   { _MMIO(0x26580), 0x0006 },
+   { _MMIO(0x26590), 0x0020 },
+   { _MMIO(0x26584), 0x },
+   { _MMIO(0x26104), 0x5582 },
+   { _MMIO(0x26184), 0xaa86 },
+   { _MMIO(0x25420), 0x08320c83 },
+   { _MMIO(0x25424), 0x06820c83 },
+   { _MMIO(0x2541c), 0x },
+   { _MMIO(0x25428), 0x0c03 },
+};
+
+static const struct i915_oa_reg *
+get_compute_basic_mux_config(struct drm_i915_private *dev_priv,
+int *len)
+{
+   *len = ARRAY_SIZE(mux_config_compute_basic);
+   return mux_config_compute_basic;
+}
+
+static const struct i915_oa_reg b_counter_config_compute_extended[] = {
+   { _MMIO(0x2724), 0xf080 },
+   { _MMIO(0x2720), 0x },
+   { _MMIO(0x2714), 0xf080 },
+   { _MMIO(0x2710), 0x },
+   { _MMIO(0x2770), 0x0007fe2a },
+   { _MMIO(0x2774), 0xff00 },
+   { _MMIO(0x2778), 0x0007fe6a },
+   { _MMIO(0x277c), 0xff00 },
+   { _MMIO(0x2780), 0x0007fe92 },
+   { _MMIO(0x2784), 0xff00 },
+   { _MMIO(0x2788), 0x0007fea2 },
+   { _MMIO(0x278c), 0xff00 },
+   { _MMIO(0x2790), 0x0007fe32 },
+   { _MMIO(0x2794), 0xff00 },
+   { _MMIO(0x2798), 0x0007fe9a },
+   { _MMIO(0x279c), 0xff00 },
+   { _MMIO(0x27a0), 0x0007ff23 },
+   { _MMIO(0x27a4), 0xff00 },
+   { _MMIO(0x27a8), 0x0007fff3 },
+   { _MMIO(0x27ac), 0xfffe },
+};
+
+static const struct i915_oa_reg mux_config_compute_extended[] = {
+   { _MMIO(0x2681c), 0x3eb00800 },
+   { _MMIO(0x26820), 0x0090 },
+   { _MMIO(0x25384), 0x02aa },
+   { _MMIO(0x25404), 0x03ff },
+   { _MMIO(0x26800), 0x00142284 },
+   { _MMIO(0x26808), 0x0e629062 },
+   { _MMIO(0x2680c), 0x3f6f55cb },
+   { _MMIO(0x26810), 0x0014 },
+   { 

[Intel-gfx] [PATCH v6 05/11] drm/i915: Add 'render basic' Haswell OA unit config

2016-10-20 Thread Robert Bragg
Adds a static OA unit, MUX + B Counter configuration for basic render
metrics on Haswell. This is auto generated from an XML
description of metric sets, currently maintained in gputop, ref:

  https://github.com/rib/gputop
  > gputop-data/oa-*.xml
  > scripts/i915-perf-kernelgen.py

  $ make -C gputop-data -f Makefile.xml SYSFS=0 WHITELIST=RenderBasic

Signed-off-by: Robert Bragg 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/Makefile  |   3 +-
 drivers/gpu/drm/i915/i915_drv.h|  14 
 drivers/gpu/drm/i915/i915_oa_hsw.c | 144 +
 drivers/gpu/drm/i915/i915_oa_hsw.h |  34 +
 4 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_oa_hsw.c
 create mode 100644 drivers/gpu/drm/i915/i915_oa_hsw.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8d4e25f..ac0c3ad 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -114,7 +114,8 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 i915-y += i915_vgpu.o
 
 # perf code
-i915-y += i915_perf.o
+i915-y += i915_perf.o \
+ i915_oa_hsw.o
 
 ifeq ($(CONFIG_DRM_I915_GVT),y)
 i915-y += intel_gvt.o
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d3737c6..28f3f77 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1760,6 +1760,11 @@ struct intel_wm_config {
bool sprites_scaled;
 };
 
+struct i915_oa_reg {
+   i915_reg_t addr;
+   u32 value;
+};
+
 struct i915_perf_stream;
 
 struct i915_perf_stream_ops {
@@ -2142,6 +2147,15 @@ struct drm_i915_private {
bool initialized;
struct mutex lock;
struct list_head streams;
+
+   struct {
+   u32 metrics_set;
+
+   const struct i915_oa_reg *mux_regs;
+   int mux_regs_len;
+   const struct i915_oa_reg *b_counter_regs;
+   int b_counter_regs_len;
+   } oa;
} perf;
 
/* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c 
b/drivers/gpu/drm/i915/i915_oa_hsw.c
new file mode 100644
index 000..8906380
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_oa_hsw.c
@@ -0,0 +1,144 @@
+/*
+ * Autogenerated file, DO NOT EDIT manually!
+ *
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_oa_hsw.h"
+
+enum metric_set_id {
+   METRIC_SET_ID_RENDER_BASIC = 1,
+};
+
+int i915_oa_n_builtin_metric_sets_hsw = 1;
+
+static const struct i915_oa_reg b_counter_config_render_basic[] = {
+   { _MMIO(0x2724), 0x0080 },
+   { _MMIO(0x2720), 0x },
+   { _MMIO(0x2714), 0x0080 },
+   { _MMIO(0x2710), 0x },
+};
+
+static const struct i915_oa_reg mux_config_render_basic[] = {
+   { _MMIO(0x253a4), 0x0160 },
+   { _MMIO(0x25440), 0x0010 },
+   { _MMIO(0x25128), 0x },
+   { _MMIO(0x2691c), 0x0800 },
+   { _MMIO(0x26aa0), 0x0150 },
+   { _MMIO(0x26b9c), 0x6000 },
+   { _MMIO(0x2791c), 0x0800 },
+   { _MMIO(0x27aa0), 0x0150 },
+   { _MMIO(0x27b9c), 0x6000 },
+   { _MMIO(0x2641c), 0x0400 },
+   { _MMIO(0x25380), 0x0010 },
+   { _MMIO(0x2538c), 0x },
+   { _MMIO(0x25384), 0x0800 },
+   { _MMIO(0x25400), 0x0004 },
+   { _MMIO(0x2540c), 0x06029000 },
+   { _MMIO(0x25410), 0x0002 },
+   { _MMIO(0x25404), 0x5c30 },
+   { _MMIO(0x25100), 0x0016 },
+   { _MMIO(0x25110), 0x0400 },
+   { _MMIO(0x25104), 0x },
+   { _MMIO(0x26804), 0x1211 },
+   { 

[Intel-gfx] [PATCH v6 01/11] drm/i915: Add i915 perf infrastructure

2016-10-20 Thread Robert Bragg
Adds base i915 perf infrastructure for Gen performance metrics.

This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.

A stream is opened something like:

  uint64_t properties[] = {
  /* Single context sampling */
  DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle,

  /* Include OA reports in samples */
  DRM_I915_PERF_PROP_SAMPLE_OA, true,

  /* OA unit configuration */
  DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id,
  DRM_I915_PERF_PROP_OA_FORMAT, report_format,
  DRM_I915_PERF_PROP_OA_EXPONENT,   period_exponent,
   };
   struct drm_i915_perf_open_param parm = {
  .flags = I915_PERF_FLAG_FD_CLOEXEC |
   I915_PERF_FLAG_FD_NONBLOCK |
   I915_PERF_FLAG_DISABLED,
  .properties_ptr = (uint64_t)properties,
  .num_properties = sizeof(properties) / 16,
   };
   int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, );

Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.

No specific streams are supported yet so any attempt to open a stream
will return an error.

v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v2:
use i915_gem_context_get() - Chris Wilson

Signed-off-by: Robert Bragg 
---
 drivers/gpu/drm/i915/Makefile|   3 +
 drivers/gpu/drm/i915/i915_drv.c  |   4 +
 drivers/gpu/drm/i915/i915_drv.h  |  91 
 drivers/gpu/drm/i915/i915_perf.c | 443 +++
 include/uapi/drm/i915_drm.h  |  67 ++
 5 files changed, 608 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_perf.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 6123400..8d4e25f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 # virtual gpu code
 i915-y += i915_vgpu.o
 
+# perf code
+i915-y += i915_perf.o
+
 ifeq ($(CONFIG_DRM_I915_GVT),y)
 i915-y += intel_gvt.o
 include $(src)/gvt/Makefile
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 912d534..5449579 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -836,6 +836,8 @@ static int i915_driver_init_early(struct drm_i915_private 
*dev_priv,
 
intel_detect_preproduction_hw(dev_priv);
 
+   i915_perf_init(dev_priv);
+
return 0;
 
 err_workqueues:
@@ -849,6 +851,7 @@ static int i915_driver_init_early(struct drm_i915_private 
*dev_priv,
  */
 static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 {
+   i915_perf_fini(dev_priv);
i915_gem_load_cleanup(_priv->drm);
i915_workqueues_cleanup(dev_priv);
 }
@@ -2575,6 +2578,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, 
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, 
i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, 
i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, 
DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5b2b7f3..d3737c6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1760,6 +1760,84 @@ struct intel_wm_config {
bool sprites_scaled;
 };
 
+struct i915_perf_stream;
+
+struct i915_perf_stream_ops {
+   /* Enables the collection of HW samples, either in response to
+* I915_PERF_IOCTL_ENABLE or implicitly called when stream is
+* opened without I915_PERF_FLAG_DISABLED.
+*/
+   void (*enable)(struct i915_perf_stream *stream);
+
+   /* Disables the collection of HW samples, either in response to
+* I915_PERF_IOCTL_DISABLE or implicitly called before
+* destroying the stream.
+*/
+   void (*disable)(struct i915_perf_stream *stream);
+
+   /* Return: true if any i915 perf records are ready to read()
+* for this stream.
+*/
+   bool (*can_read)(struct i915_perf_stream *stream);
+
+   /* Call poll_wait, passing a wait queue that will be woken
+* once there is something ready to read() for the stream
+*/
+   void (*poll_wait)(struct i915_perf_stream 

[Intel-gfx] [PATCH v6 02/11] drm/i915: rename OACONTROL GEN7_OACONTROL

2016-10-20 Thread Robert Bragg
OACONTROL changes quite a bit for gen8, with some bits split out into a
per-context OACTXCONTROL register. Rename now before adding more gen7 OA
registers

Signed-off-by: Robert Bragg 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/gvt/handlers.c| 2 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++--
 drivers/gpu/drm/i915/i915_reg.h| 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c 
b/drivers/gpu/drm/i915/gvt/handlers.c
index 3e74fb3..68e07a1 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2159,7 +2159,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
MMIO_F(0x2290, 8, 0, 0, 0, D_HSW_PLUS, NULL, NULL);
-   MMIO_D(OACONTROL, D_HSW);
+   MMIO_D(GEN7_OACONTROL, D_HSW);
MMIO_D(0x2b00, D_BDW_PLUS);
MMIO_D(0x2360, D_BDW_PLUS);
MMIO_F(0x5200, 32, 0, 0, 0, D_ALL, NULL, NULL);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index f191d7b..fe34470 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -450,7 +450,7 @@ static const struct drm_i915_reg_descriptor 
gen7_render_regs[] = {
REG64(PS_INVOCATION_COUNT),
REG64(PS_DEPTH_COUNT),
REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
-   REG32(OACONTROL), /* Only allowed for LRI and SRM. See below. */
+   REG32(GEN7_OACONTROL), /* Only allowed for LRI and SRM. See below. */
REG64(MI_PREDICATE_SRC0),
REG64(MI_PREDICATE_SRC1),
REG32(GEN7_3DPRIM_END_OFFSET),
@@ -1108,7 +1108,7 @@ static bool check_cmd(const struct intel_engine_cs 
*engine,
 * to the register. Hence, limit OACONTROL writes to
 * only MI_LOAD_REGISTER_IMM commands.
 */
-   if (reg_addr == i915_mmio_reg_offset(OACONTROL)) {
+   if (reg_addr == i915_mmio_reg_offset(GEN7_OACONTROL)) {
if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
DRM_DEBUG_DRIVER("CMD: Rejected LRM to 
OACONTROL\n");
return false;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 00efaa1..0ad7f03 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -615,7 +615,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define HSW_CS_GPR(n)   _MMIO(0x2600 + (n) * 8)
 #define HSW_CS_GPR_UDW(n)   _MMIO(0x2600 + (n) * 8 + 4)
 
-#define OACONTROL _MMIO(0x2360)
+#define GEN7_OACONTROL _MMIO(0x2360)
 
 #define _GEN7_PIPEA_DE_LOAD_SL 0x70068
 #define _GEN7_PIPEB_DE_LOAD_SL 0x71068
-- 
2.10.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 04/11] drm/i915: don't whitelist oacontrol in cmd parser

2016-10-20 Thread Robert Bragg
Being able to program OACONTROL from a non-privileged batch buffer is
not sufficient to be able to configure the OA unit. This was originally
allowed to help enable Mesa to expose OA counters via the
INTEL_performance_query extension, but the current implementation based
on programming OACONTROL via a batch buffer isn't able to report useable
data without a more complete OA unit configuration. Mesa handles the
possibility that writes to OACONTROL may not be allowed and so only
advertises the extension after explicitly testing that a write to
OACONTROL succeeds. Based on this; removing OACONTROL from the whitelist
should be ok for userspace.

Removing this simplifies adding a new kernel api for configuring the OA
unit without needing to consider the possibility that userspace might
trample on OACONTROL state which we'd like to start managing within
the kernel instead. In particular running any Mesa based GL application
currently results in clearing OACONTROL when initializing which would
disable the capturing of metrics.

Signed-off-by: Robert Bragg 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 38 ++
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c45dd83..5152d6f 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -450,7 +450,6 @@ static const struct drm_i915_reg_descriptor 
gen7_render_regs[] = {
REG64(PS_INVOCATION_COUNT),
REG64(PS_DEPTH_COUNT),
REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
-   REG32(GEN7_OACONTROL), /* Only allowed for LRI and SRM. See below. */
REG64(MI_PREDICATE_SRC0),
REG64(MI_PREDICATE_SRC1),
REG32(GEN7_3DPRIM_END_OFFSET),
@@ -1060,8 +1059,7 @@ bool intel_engine_needs_cmd_parser(struct intel_engine_cs 
*engine)
 static bool check_cmd(const struct intel_engine_cs *engine,
  const struct drm_i915_cmd_descriptor *desc,
  const u32 *cmd, u32 length,
- const bool is_master,
- bool *oacontrol_set)
+ const bool is_master)
 {
if (desc->flags & CMD_DESC_SKIP)
return true;
@@ -1099,31 +1097,6 @@ static bool check_cmd(const struct intel_engine_cs 
*engine,
}
 
/*
-* OACONTROL requires some special handling for
-* writes. We want to make sure that any batch which
-* enables OA also disables it before the end of the
-* batch. The goal is to prevent one process from
-* snooping on the perf data from another process. To do
-* that, we need to check the value that will be written
-* to the register. Hence, limit OACONTROL writes to
-* only MI_LOAD_REGISTER_IMM commands.
-*/
-   if (reg_addr == i915_mmio_reg_offset(GEN7_OACONTROL)) {
-   if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
-   DRM_DEBUG_DRIVER("CMD: Rejected LRM to 
OACONTROL\n");
-   return false;
-   }
-
-   if (desc->cmd.value == MI_LOAD_REGISTER_REG) {
-   DRM_DEBUG_DRIVER("CMD: Rejected LRR to 
OACONTROL\n");
-   return false;
-   }
-
-   if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1))
-   *oacontrol_set = (cmd[offset + 1] != 0);
-   }
-
-   /*
 * Check the value written to the register against the
 * allowed mask/value pair given in the whitelist entry.
 */
@@ -1214,7 +1187,6 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
u32 *cmd, *batch_end;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = _desc;
-   bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
bool needs_clflush_after = false;
int ret = 0;
 
@@ -1270,8 +1242,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
break;
}
 
-   if (!check_cmd(engine, desc, cmd, length, is_master,
-  _set)) {
+   if (!check_cmd(engine, desc, cmd, length, is_master)) {
ret = -EACCES;
break;
}
@@ -1279,11 +1250,6 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
cmd += length;
}
 
- 

[Intel-gfx] [PATCH v6 03/11] drm/i915: return EACCES for check_cmd() failures

2016-10-20 Thread Robert Bragg
check_cmd() is checking whether a command adheres to certain
restrictions that ensure it's safe to execute within a privileged batch
buffer. Returning false implies a privilege problem, not that the
command is invalid.

The distinction makes the difference between allowing the buffer to be
executed as an unprivileged batch buffer or returning an EINVAL error to
userspace without executing anything.

In a case where userspace may want to test whether it can successfully
write to a register that needs privileges the distinction may be
important and an EINVAL error may be considered fatal.

In particular this is currently true for Mesa, which includes a test for
whether OACONTROL can be written too, but Mesa treats any error when
flushing a batch buffer as fatal, calling exit(1).

As it is currently Mesa can gracefully handle a failure to write to
OACONTROL if the command parser is disabled, but if we were to remove
OACONTROL from the parser's whitelist then the returned EINVAL would
break Mesa applications as they attempt an OACONTROL write.

This bumps the command parser version from 7 to 8, as the change is
visible to userspace.

Signed-off-by: Robert Bragg 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index fe34470..c45dd83 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1272,7 +1272,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
 
if (!check_cmd(engine, desc, cmd, length, is_master,
   _set)) {
-   ret = -EINVAL;
+   ret = -EACCES;
break;
}
 
@@ -1333,6 +1333,9 @@ int i915_cmd_parser_get_version(struct drm_i915_private 
*dev_priv)
 * 5. GPGPU dispatch compute indirect registers.
 * 6. TIMESTAMP register and Haswell CS GPR registers
 * 7. Allow MI_LOAD_REGISTER_REG between whitelisted registers.
+* 8. Don't report cmd_check() failures as EINVAL errors to userspace;
+*rely on the HW to NOOP disallowed commands as it would without
+*the parser enabled.
 */
-   return 7;
+   return 8;
 }
-- 
2.10.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Jani Nikula
On Thu, 20 Oct 2016, Imre Deak  wrote:
> On Thu, 2016-10-20 at 21:20 +0300, Jani Nikula wrote:
>> On Thu, 20 Oct 2016, Imre Deak  wrote:
>> > +  bool desc_valid;
>> > +  struct intel_dp_desc desc;
>> 
>> I guess we could cache the desc in intel_dp directly. Independent of
>> this patch.
>
> It's not used anywhere else, but I can move it to intel_dp.
>
>> 
>> Also, I'm wondering if we could stick with just aux here, and read
>> something else from dpcd instead.
>
> Not sure either, I picked desc since we read it out anyway during init.

That was my point with putting it in intel_dp. If it's read out anyway,
lspcon or not, just cache it in intel_dp.

BR,
Jani.

-- 
Jani Nikula, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t 0/3] Convert sh scripts to C variants.

2016-10-20 Thread Jani Nikula
On Thu, 20 Oct 2016, Marius Vlad  wrote:
> This series adds some library support to help converting sh
> scripts to C version. Converted drv_module_reload_basic and
> kms_sysfs_edid_timing.

>  18 files changed, 600 insertions(+), 180 deletions(-)

Someone please justify this, plus pulling in two new dependencies. I can
think of a thing or two, but it needs to be in the commit messages. And
I'm not convinced by the justification I came up with.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 12/41] drm/i915: Introduce an internal allocator for disposable private objects

2016-10-20 Thread Chris Wilson
On Thu, Oct 20, 2016 at 05:22:23PM +0100, Tvrtko Ursulin wrote:
> 
> On 20/10/2016 16:03, Chris Wilson wrote:
> >Quite a few of our objects used for internal hardware programming do not
> >benefit from being swappable or from being zero initialised. As such
> >they do not benefit from using a shmemfs backing storage and since they
> >are internal and never directly exposed to the user, we do not need to
> >worry about providing a filp. For these we can use an
> >drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> >are not swap backed and not automatically pinned. If they are reaped
> >by the shrinker, the pages are released and the contents discarded. For
> >the internal use case, this is fine as for example, ringbuffers are
> >pinned from being written by a request to be read by the hardware. Once
> >they are idle, they can be discarded entirely. As such they are a good
> >match for execlist ringbuffers and a small variety of other internal
> >objects.
> >
> >In the first iteration, this is limited to the scratch batch buffers we
> >use (for command parsing and state initialisation).
> 
> And the status page.

Yeah, I was just thinking of the runtime allocated blocks where the
change can be measured.

> >+max_order = MAX_ORDER;
> >+#ifdef CONFIG_SWIOTLB
> >+if (swiotlb_nr_tbl())
> >+max_order = min(max_order, ilog2(IO_TLB_SEGSIZE));
> >+#endif
> 
> I couldn't figure out what IO_TLB_SEGSIZE actually is in some
> minutes of cross-referencing. Did not seem to be in units of bytes
> according to swiotlb.h.

Pages.
 
> In either case my question is - why use different parameters than
> swiotlb_max_size you recently added to i915_gem.c?

I was trying to exploit the compile time constants, and I did not care
to grow the search for even higher orders.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Jani Nikula
On Thu, 20 Oct 2016, Imre Deak  wrote:
> On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
>> On Thu, 20 Oct 2016, Jani Nikula  wrote:
>> > On Thu, 20 Oct 2016, Imre Deak  wrote:
>> > > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
>> > > expected LS mode. It also appears to be in a state where AUX DPCD reads
>> > > will succeed but return garbage recovering only after a few hundreds of
>> > > milliseconds. After the recovery time DPCD reads will result in the
>> > > correct values and things will continue to work. If I2C over AUX is
>> > > attempted during this recovery time (implying an AUX write transaction)
>> > > the firmware won't recover and will stay in this broken state.
>> > > 
>> > > As a workaround check if the firmware is in PCON state after resume and
>> > > if so wait until the correct DPCD values are returned. For this we
>> > > compare the branch descriptor with the one we cached during init time.
>> > > If the firmware was in the LS state, we skip the w/a and continue as
>> > > before.
>> > > 
>> > > Cc: Shashank Sharma 
>> > > Cc: Ville Syrjälä 
>> > > Cc: Jani Nikula 
>> > > Signed-off-by: Imre Deak 
>> > > ---
>> > >  drivers/gpu/drm/i915/intel_dp.c |  2 +-
>> > >  drivers/gpu/drm/i915/intel_drv.h|  6 -
>> > >  drivers/gpu/drm/i915/intel_lspcon.c | 52 
>> > > ++---
>> > >  3 files changed, 48 insertions(+), 12 deletions(-)
>> > > 
>> > > diff --git a/drivers/gpu/drm/i915/intel_dp.c 
>> > > b/drivers/gpu/drm/i915/intel_dp.c
>> > > index e90211e..ec031db 100644
>> > > --- a/drivers/gpu/drm/i915/intel_dp.c
>> > > +++ b/drivers/gpu/drm/i915/intel_dp.c
>> > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
>> > >  intel_dp->DP = DP;
>> > >  }
>> > >  
>> > > -static bool
>> > > +bool
>> > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
>> > >  {
>> > >  if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
>> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h 
>> > > b/drivers/gpu/drm/i915/intel_drv.h
>> > > index a35e241..9a2366e 100644
>> > > --- a/drivers/gpu/drm/i915/intel_drv.h
>> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
>> > > @@ -972,7 +972,9 @@ struct intel_dp {
>> > >  struct intel_lspcon {
>> > >  bool active;
>> > >  enum drm_lspcon_mode mode;
>> > > -struct drm_dp_aux *aux;
>> > > +struct intel_dp *intel_dp;
>> > > +bool desc_valid;
>> > > +struct intel_dp_desc desc;
>> > 
>> > I guess we could cache the desc in intel_dp directly. Independent of
>> > this patch.
>> > 
>> > Also, I'm wondering if we could stick with just aux here, and read
>> > something else from dpcd instead.
>> > 
>> > >  };
>> > >  
>> > >  struct intel_digital_port {
>> > > @@ -1469,6 +1471,8 @@ static inline unsigned int 
>> > > intel_dp_unused_lane_mask(int lane_count)
>> > >  }
>> > >  
>> > >  bool
>> > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
>> > > +bool
>> > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc 
>> > > *desc);
>> > >  void
>> > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc 
>> > > *desc);
>> > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
>> > > b/drivers/gpu/drm/i915/intel_lspcon.c
>> > > index d2c8cb2..54c6173 100644
>> > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
>> > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
>> > > @@ -30,7 +30,7 @@
>> > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon 
>> > > *lspcon)
>> > >  {
>> > >  enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
>> > > -struct i2c_adapter *adapter = >aux->ddc;
>> > > +struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>> > >  
>> > >  if (drm_lspcon_get_mode(adapter, ¤t_mode))
>> > >  DRM_ERROR("Error reading LSPCON mode\n");
>> > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon 
>> > > *lspcon,
>> > >  {
>> > >  int err;
>> > >  enum drm_lspcon_mode current_mode;
>> > > -struct i2c_adapter *adapter = >aux->ddc;
>> > > +struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>> > >  
>> > >  err = drm_lspcon_get_mode(adapter, ¤t_mode);
>> > >  if (err) {
>> > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon 
>> > > *lspcon,
>> > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
>> > >  {
>> > >  enum drm_dp_dual_mode_type adaptor_type;
>> > > -struct i2c_adapter *adapter = >aux->ddc;
>> > > +struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>> > >  
>> > >  /* Lets probe the adaptor and check its type */
>> > >  adaptor_type = drm_dp_dual_mode_detect(adapter);
>> > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon 

Re: [Intel-gfx] [PATCH 1/2] drm/i915: Move down skl/kbl ddi iboost and n_edp_entires fixup

2016-10-20 Thread Manasi Navare
On Fri, Sep 30, 2016 at 11:05:56AM -0700, Rodrigo Vivi wrote:
> No functional change.
> Only moving this fixup block out of ddi_translation definitions
> so we can split skl and kbl cleanly.
> 
> Cc: Manasi Navare 
> Signed-off-by: Rodrigo Vivi 
>
> ---
>  drivers/gpu/drm/i915/intel_ddi.c | 21 -
>  1 file changed, 12 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ddi.c 
> b/drivers/gpu/drm/i915/intel_ddi.c
> index 35f0b7c..018964b 100644
> --- a/drivers/gpu/drm/i915/intel_ddi.c
> +++ b/drivers/gpu/drm/i915/intel_ddi.c
> @@ -436,15 +436,6 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder 
> *encoder)
>   skl_get_buf_trans_dp(dev_priv, _dp_entries);
>   ddi_translations_edp =
>   skl_get_buf_trans_edp(dev_priv, _edp_entries);
> -
> - /* If we're boosting the current, set bit 31 of trans1 */
> - if (dev_priv->vbt.ddi_port_info[port].dp_boost_level)
> - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE;
> -
> - if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP &&
> - port != PORT_A && port != PORT_E &&
> - n_edp_entries > 9))
> - n_edp_entries = 9;
>   } else if (IS_BROADWELL(dev_priv)) {
>   ddi_translations_fdi = bdw_ddi_translations_fdi;
>   ddi_translations_dp = bdw_ddi_translations_dp;
> @@ -464,6 +455,18 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder 
> *encoder)
>   n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
>   }
>  
> + /* Skylake/Kabylake iboost and edp_entries fixup */
> + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
> + /* If we're boosting the current, set bit 31 of trans1 */
> + if (dev_priv->vbt.ddi_port_info[port].dp_boost_level)
> + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE;
> +
> + if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP &&
> + port != PORT_A && port != PORT_E &&
> + n_edp_entries > 9))
> + n_edp_entries = 9;
> + }
> +
>   switch (encoder->type) {
>   case INTEL_OUTPUT_EDP:
>   ddi_translations = ddi_translations_edp;
> -- 
> 1.9.1
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t 1/3] lib/{igt_sysfs, igt_aux}: Make available to other users kick_fbcon() (unbind_fbcon()), and added helpers to igt_aux.

2016-10-20 Thread Chris Wilson
On Thu, Oct 20, 2016 at 10:36:47PM +0300, Marius Vlad wrote:
> +int
> +igt_pkill(int sig, const char *comm)
> +{
> + int err = 0, try = 5;
> + PROCTAB *proc;
> + proc_t *proc_info;
> +
> + proc = openproc(PROC_FILLCOM | PROC_FILLSTAT | PROC_FILLARG);
> + igt_assert(proc != NULL);
> +
> + while ((proc_info = readproc(proc, NULL))) {
> + if (proc_info &&

proc_info cannot be NULL, you've already tested for that.

> + !strncasecmp(proc_info->cmd, comm, sizeof(proc_info->cmd))) 
> {
> + switch (sig) {
> + case SIGTERM:
> + case SIGKILL:
> + do {
> + kill(proc_info->tid, sig);
> + } while (kill(proc_info->tid, 0) < 0 && try--);
> +
> + if (kill(proc_info->tid, 0) < 0)
> + err = -1;

Not convinced this is good behaviour for an API, to repeatedly call
kill(SIGTERM) until bored. If the function didn't take a int sig and was
called igt_terminate_process(const char *name), then repeating a few
SIGTERM; before sending SIGKILL makes sense. But as it it, named like
kill() I expect this to send exactly one signal.

> +/**
> + * igt_kill:
> + * @sig: Signal to send.
> + * @pid: Process pid to send.
> + * @returns: 0 in case of success or -1 otherwise.
> + *
> + * This function is identical to igt_pkill() only that it searches the 
> process
> + * table using @pid instead of comm name.

There's a function called kill() that does exactly that, you even use it
here ;)

> +int
> +igt_rmmod(const char *mod_name, bool force)
> +{
> + struct kmod_ctx *ctx;
> + struct kmod_module *kmod;
> + int err, flags = 0;
> +
> + ctx = kmod_new(NULL, NULL);
> + igt_assert(ctx != NULL);
> +
> + err = kmod_module_new_from_name(ctx, mod_name, );
> + if (err < 0) {
> + igt_info("Could not use module %s (%s)\n", mod_name,
> + strerror(-err));
> + err = -1;
> + goto out;
> + }
> +
> + if (igt_module_in_use(kmod)) {
> + igt_info("Module %s is in use\n", mod_name);
> + err = -1;
> + goto out;
> + }

Pointless (this is redundant).

> +
> + if (force) {
> + flags |= KMOD_REMOVE_FORCE;

Will it not be wiser (future proof) just to pass flags from the caller?

> + }
> +
> + err = kmod_module_remove_module(kmod, flags);
> + if (err < 0) {
> + igt_info("Could not remove module %s (%s)\n", mod_name,
> + strerror(-err));
> + err = -1;
> + }
> +
> +out:
> + kmod_module_unref(kmod);
> + kmod_unref(ctx);
> +
> + return err;
> +}

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t 2/3] tests/drv_module_reload_basic: Convert sh script to C version.

2016-10-20 Thread Chris Wilson
On Thu, Oct 20, 2016 at 10:36:48PM +0300, Marius Vlad wrote:
> +static const char *tests[] = {
> + "gem_alive", "gem_exec_store"
> +};

gem_alive is just a single ioctl query, simpler and move obvious to do
it inline. Then remove tests/gem_alive.c, but it may live on as
tools/gem_alive.c (or better yet tools/gem_info.c).

gem_exec_store is a couple of ioctls...

A rewritten C test should not be i915 specific if we can help it. The
core of it can be driver agnostic (same steps required to unbind from
console and reload after all).

> +
> +static int
> +reload(const char *opts_i915)
> +{
> + kick_fbcon(0);
> +
> + if (opts_i915)
> + igt_info("Reloading i915 with %s\n\n", opts_i915);
> +
> + if (igt_lsmod_has_module("snd_hda_intel")) {
> + if (igt_pkill(SIGTERM, "alsactl") == -1) {
> + return IGT_EXIT_FAILURE;
> + }
> + if (igt_rmmod("snd_hda_intel", false) == -1)
> + return IGT_EXIT_FAILURE;
> + }
> +
> + /* gen5 */
> + if (igt_lsmod_has_module("intel_ips")) {
> + igt_rmmod("intel_ips", false);
> + }
> +
> + if (igt_rmmod("i915", false) == -1) {
> + return IGT_EXIT_SKIP;
> + }

Ugh. These names leave much to be desired.

igt_kmod_load()
igt_kmod_unload()
igt_kmod_is_loaded() (can return refcnt >= 0 and -1 for unloaded)

> +
> + igt_info("i915.ko has been unloaded!\n");
> +
> + if (igt_lsmod_has_module("intel-gtt")) {
> + igt_rmmod("intel-gtt", false);
> + }
> +
> + igt_rmmod("drm_kms_helper", false);
> + igt_rmmod("drm", false);
> +
> + if (igt_lsmod_has_module("i915")) {
> + igt_info("WARNING: i915.ko still loaded!\n");
> + return IGT_EXIT_FAILURE;
> + } else {
> + igt_info("module successfully unloaded\n");
> + }
> +
> + /* we do not have automatic loading of dependencies */
> + igt_insmod("drm", NULL);
> + igt_insmod("drm_kms_helper", NULL);
> +
> + if (igt_insmod("i915", opts_i915) == -1) {
> + igt_info("Could not load i915\n");
> + return IGT_EXIT_FAILURE;
> + }
> +
> + kick_fbcon(1);
> +
> + if (igt_insmod("snd_hda_intel", NULL) == -1)
> + return IGT_EXIT_FAILURE;
> +
> + return IGT_EXIT_SUCCESS;
> +}
> +
> +static void
> +igt_execv(char **argv)
> +{
> + igt_fork(child, 1) {
> + if (execv(argv[0], argv) < 0) {
> + igt_info("Failed to exec %s\n",
> + argv[0]);
> + exit(IGT_EXIT_FAILURE);
> + }
> + }
> + igt_waitchildren();
> +}
> +
> +static void
> +finish_load(char *dirname)
> +{
> + char buf[PATH_MAX];
> + char *__argv[2] = { buf, NULL };
> +
> + memset(buf, 0, PATH_MAX);
> + snprintf(buf, sizeof(buf), "%s/%s", dirname, tests[0]);
> +
> + igt_execv(__argv);
> +
> + memset(buf, 0, sizeof(buf));
> + snprintf(buf, sizeof(buf), "%s/%s", dirname, tests[1]);
> +
> + igt_execv(__argv);
> +}
> +
> +int main(int argc, char *argv[])

igt_main
{

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Imre Deak
On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Jani Nikula  wrote:
> > On Thu, 20 Oct 2016, Imre Deak  wrote:
> > > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> > > expected LS mode. It also appears to be in a state where AUX DPCD reads
> > > will succeed but return garbage recovering only after a few hundreds of
> > > milliseconds. After the recovery time DPCD reads will result in the
> > > correct values and things will continue to work. If I2C over AUX is
> > > attempted during this recovery time (implying an AUX write transaction)
> > > the firmware won't recover and will stay in this broken state.
> > > 
> > > As a workaround check if the firmware is in PCON state after resume and
> > > if so wait until the correct DPCD values are returned. For this we
> > > compare the branch descriptor with the one we cached during init time.
> > > If the firmware was in the LS state, we skip the w/a and continue as
> > > before.
> > > 
> > > Cc: Shashank Sharma 
> > > Cc: Ville Syrjälä 
> > > Cc: Jani Nikula 
> > > Signed-off-by: Imre Deak 
> > > ---
> > >  drivers/gpu/drm/i915/intel_dp.c |  2 +-
> > >  drivers/gpu/drm/i915/intel_drv.h|  6 -
> > >  drivers/gpu/drm/i915/intel_lspcon.c | 52 
> > > ++---
> > >  3 files changed, 48 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_dp.c 
> > > b/drivers/gpu/drm/i915/intel_dp.c
> > > index e90211e..ec031db 100644
> > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
> > >   intel_dp->DP = DP;
> > >  }
> > >  
> > > -static bool
> > > +bool
> > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
> > >  {
> > >   if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> > > b/drivers/gpu/drm/i915/intel_drv.h
> > > index a35e241..9a2366e 100644
> > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > @@ -972,7 +972,9 @@ struct intel_dp {
> > >  struct intel_lspcon {
> > >   bool active;
> > >   enum drm_lspcon_mode mode;
> > > - struct drm_dp_aux *aux;
> > > + struct intel_dp *intel_dp;
> > > + bool desc_valid;
> > > + struct intel_dp_desc desc;
> > 
> > I guess we could cache the desc in intel_dp directly. Independent of
> > this patch.
> > 
> > Also, I'm wondering if we could stick with just aux here, and read
> > something else from dpcd instead.
> > 
> > >  };
> > >  
> > >  struct intel_digital_port {
> > > @@ -1469,6 +1471,8 @@ static inline unsigned int 
> > > intel_dp_unused_lane_mask(int lane_count)
> > >  }
> > >  
> > >  bool
> > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > > +bool
> > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc 
> > > *desc);
> > >  void
> > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc 
> > > *desc);
> > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
> > > b/drivers/gpu/drm/i915/intel_lspcon.c
> > > index d2c8cb2..54c6173 100644
> > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > > @@ -30,7 +30,7 @@
> > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon 
> > > *lspcon)
> > >  {
> > >   enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> > > - struct i2c_adapter *adapter = >aux->ddc;
> > > + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> > >  
> > >   if (drm_lspcon_get_mode(adapter, ¤t_mode))
> > >   DRM_ERROR("Error reading LSPCON mode\n");
> > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon 
> > > *lspcon,
> > >  {
> > >   int err;
> > >   enum drm_lspcon_mode current_mode;
> > > - struct i2c_adapter *adapter = >aux->ddc;
> > > + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> > >  
> > >   err = drm_lspcon_get_mode(adapter, ¤t_mode);
> > >   if (err) {
> > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon 
> > > *lspcon,
> > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
> > >  {
> > >   enum drm_dp_dual_mode_type adaptor_type;
> > > - struct i2c_adapter *adapter = >aux->ddc;
> > > + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> > >  
> > >   /* Lets probe the adaptor and check its type */
> > >   adaptor_type = drm_dp_dual_mode_detect(adapter);
> > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
> > >   return true;
> > >  }
> > >  
> > > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> > > +{
> > > + unsigned long start = jiffies;
> > > +
> > > + if (!lspcon->desc_valid)
> > > + return;
> > > +
> > > + while (1) {
> > > + struct intel_dp_desc desc;
> > > +
> > > + /*
> > > +  

[Intel-gfx] [PATCH i-g-t 1/3] lib/{igt_sysfs, igt_aux}: Make available to other users kick_fbcon() (unbind_fbcon()), and added helpers to igt_aux.

2016-10-20 Thread Marius Vlad
Previously under unbind_fbcon(), disable/enable framebuffer console.

lib/igt_aux: Added helpers to help convert sh scripts to C version. libkmod and
procps interface.

Signed-off-by: Marius Vlad 
---
 configure.ac|   2 +
 lib/Makefile.am |   2 +
 lib/igt_aux.c   | 278 
 lib/igt_aux.h   |   7 ++
 lib/igt_gvt.c   |  43 +
 lib/igt_sysfs.c |  54 +++
 lib/igt_sysfs.h |   2 +
 7 files changed, 347 insertions(+), 41 deletions(-)

diff --git a/configure.ac b/configure.ac
index 735cfd5..2c6e49d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -121,6 +121,8 @@ AC_SUBST(ASSEMBLER_WARN_CFLAGS)
 
 PKG_CHECK_MODULES(DRM, [libdrm])
 PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10])
+PKG_CHECK_MODULES(KMOD, [libkmod])
+PKG_CHECK_MODULES(PROCPS, [libprocps])
 
 case "$target_cpu" in
x86*|i?86)
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 4c0893d..e1737bd 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -34,6 +34,8 @@ AM_CFLAGS += $(CAIRO_CFLAGS)
 libintel_tools_la_LIBADD = \
$(DRM_LIBS) \
$(PCIACCESS_LIBS) \
+   $(PROCPS_LIBS) \
+   $(KMOD_LIBS) \
$(CAIRO_LIBS) \
$(LIBUDEV_LIBS) \
$(LIBUNWIND_LIBS) \
diff --git a/lib/igt_aux.c b/lib/igt_aux.c
index 421f6d4..d150818 100644
--- a/lib/igt_aux.c
+++ b/lib/igt_aux.c
@@ -51,6 +51,9 @@
 #include 
 #include 
 
+#include 
+#include 
+
 #include "drmtest.h"
 #include "i915_drm.h"
 #include "intel_chipset.h"
@@ -1193,6 +1196,281 @@ void igt_set_module_param_int(const char *name, int val)
igt_set_module_param(name, str);
 }
 
+/**
+ * igt_pkill:
+ * @sig: Signal to send
+ * @name: Name of process in the form found in /proc/pid/comm (limited to 15
+ * chars)
+ * @returns: 0 in case the process is not found running or the signal has been
+ * sent successfully or -1 otherwise.
+ *
+ * This function sends the signal @sig for a process found in process table
+ * with name @comm.
+ */
+int
+igt_pkill(int sig, const char *comm)
+{
+   int err = 0, try = 5;
+   PROCTAB *proc;
+   proc_t *proc_info;
+
+   proc = openproc(PROC_FILLCOM | PROC_FILLSTAT | PROC_FILLARG);
+   igt_assert(proc != NULL);
+
+   while ((proc_info = readproc(proc, NULL))) {
+   if (proc_info &&
+   !strncasecmp(proc_info->cmd, comm, sizeof(proc_info->cmd))) 
{
+   switch (sig) {
+   case SIGTERM:
+   case SIGKILL:
+   do {
+   kill(proc_info->tid, sig);
+   } while (kill(proc_info->tid, 0) < 0 && try--);
+
+   if (kill(proc_info->tid, 0) < 0)
+   err = -1;
+   break;
+   default:
+   if (kill(proc_info->tid, sig) < 0)
+   err = -1;
+   break;
+   }
+
+   freeproc(proc_info);
+   break;
+   }
+   freeproc(proc_info);
+   }
+
+   closeproc(proc);
+   return err;
+}
+
+/**
+ * igt_kill:
+ * @sig: Signal to send.
+ * @pid: Process pid to send.
+ * @returns: 0 in case of success or -1 otherwise.
+ *
+ * This function is identical to igt_pkill() only that it searches the process
+ * table using @pid instead of comm name.
+ *
+ */
+int
+igt_kill(int sig, pid_t pid)
+{
+   int err = 0, try = 5;
+   PROCTAB *proc;
+   proc_t *proc_info;
+
+   proc = openproc(PROC_PID | PROC_FILLSTAT | PROC_FILLARG);
+   igt_assert(proc != NULL);
+
+   while ((proc_info = readproc(proc, NULL))) {
+   if (proc_info && proc_info->tid == pid) {
+   switch (sig) {
+   case SIGTERM:
+   case SIGKILL:
+   do {
+   kill(proc_info->tid, sig);
+   } while (kill(proc_info->tid, 0) < 0 && try--);
+
+   if (kill(proc_info->tid, 0) < 0)
+   err = -1;
+   break;
+   default:
+   if (kill(proc_info->tid, sig) < 0)
+   err = -1;
+   break;
+   }
+   freeproc(proc_info);
+   break;
+   }
+   freeproc(proc_info);
+   }
+
+   closeproc(proc);
+   return err;
+}
+
+static bool
+igt_module_in_use(struct kmod_module *kmod)
+{
+   int err;
+
+   err = kmod_module_get_initstate(kmod);
+
+   /* if compiled builtin or does not exist */
+   if (err == KMOD_MODULE_BUILTIN || err < 0)
+   return 

[Intel-gfx] [PATCH i-g-t 0/3] Convert sh scripts to C variants.

2016-10-20 Thread Marius Vlad
This series adds some library support to help converting sh
scripts to C version. Converted drv_module_reload_basic and
kms_sysfs_edid_timing.

Marius Vlad (3):
  lib/igt_sysfs: Make available to other users kick_fbcon() function
(previously under unbind_fbcon()), to disable/enable framebuffer console.
  lib/igt_aux: Added helpers to help convert sh scripts to C version (libkmod
  and procps interface).
  tests/drv_module_reload_basic: Convert sh script to C version.
  tests/kms_sysfs_edid_timing: Convert sh script to C version.

 benchmarks/gem_syslatency.c |   4 -
 configure.ac|   2 +
 lib/Makefile.am |   2 +
 lib/igt_aux.c   | 278 
 lib/igt_aux.h   |   7 +
 lib/igt_core.c  |   3 -
 lib/igt_core.h  |   3 +
 lib/igt_gvt.c   |  43 +--
 lib/igt_sysfs.c |  54 
 lib/igt_sysfs.h |   2 +
 tests/Makefile.sources  |   4 +-
 tests/drv_hangman.c |   1 -
 tests/drv_module_reload_basic   |  97 --
 tests/drv_module_reload_basic.c | 166 
 tests/gem_wait.c|   4 -
 tests/kms_flip.c|   3 -
 tests/kms_sysfs_edid_timing |  25 
 tests/kms_sysfs_edid_timing.c   |  82 
 18 files changed, 600 insertions(+), 180 deletions(-)
 delete mode 100755 tests/drv_module_reload_basic
 create mode 100644 tests/drv_module_reload_basic.c
 delete mode 100755 tests/kms_sysfs_edid_timing
 create mode 100644 tests/kms_sysfs_edid_timing.c

-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t 3/3] tests/kms_sysfs_edid_timing: Convert sh script to C version.

2016-10-20 Thread Marius Vlad
While at it, make available time macros to other users.

Signed-off-by: Marius Vlad 
---
 benchmarks/gem_syslatency.c   |  4 ---
 lib/igt_core.c|  3 --
 lib/igt_core.h|  3 ++
 tests/Makefile.sources|  2 +-
 tests/drv_hangman.c   |  1 -
 tests/gem_wait.c  |  4 ---
 tests/kms_flip.c  |  3 --
 tests/kms_sysfs_edid_timing   | 25 -
 tests/kms_sysfs_edid_timing.c | 82 +++
 9 files changed, 86 insertions(+), 41 deletions(-)
 delete mode 100755 tests/kms_sysfs_edid_timing
 create mode 100644 tests/kms_sysfs_edid_timing.c

diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
index 6cad3a0..83bfac7 100644
--- a/benchmarks/gem_syslatency.c
+++ b/benchmarks/gem_syslatency.c
@@ -133,10 +133,6 @@ static void *gem_busyspin(void *arg)
return NULL;
 }
 
-#define MSEC_PER_SEC (1000)
-#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
-#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
-
 static double elapsed(const struct timespec *a, const struct timespec *b)
 {
return 1e9*(b->tv_sec - a->tv_sec) + (b->tv_nsec - a ->tv_nsec);
diff --git a/lib/igt_core.c b/lib/igt_core.c
index 9cd5f98..f64c809 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -398,9 +398,6 @@ error:
return -errno;
 }
 
-#define MSEC_PER_SEC (1000)
-#define USEC_PER_SEC (1000*MSEC_PER_SEC)
-#define NSEC_PER_SEC (1000*USEC_PER_SEC)
 uint64_t igt_nsec_elapsed(struct timespec *start)
 {
struct timespec now;
diff --git a/lib/igt_core.h b/lib/igt_core.h
index 03be757..a45e334 100644
--- a/lib/igt_core.h
+++ b/lib/igt_core.h
@@ -847,6 +847,9 @@ extern enum igt_log_level igt_log_level;
 void igt_set_timeout(unsigned int seconds,
 const char *op);
 
+#define MSEC_PER_SEC (1000)
+#define USEC_PER_SEC (1000*MSEC_PER_SEC)
+#define NSEC_PER_SEC (1000*USEC_PER_SEC)
 /**
  * igt_nsec_elapsed:
  * @start: measure from this point in time
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c35ea11..969ef0b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -210,6 +210,7 @@ TESTS_progs = \
kms_mmap_write_crc \
kms_pwrite_crc \
kms_sink_crc_basic \
+   kms_sysfs_edid_timing \
prime_udl \
drv_module_reload_basic \
$(NULL)
@@ -222,7 +223,6 @@ TESTS_scripts_M = \
 TESTS_scripts = \
debugfs_emon_crash \
drv_debugfs_reader \
-   kms_sysfs_edid_timing \
sysfs_l3_parity \
test_rte_check \
tools_test \
diff --git a/tests/drv_hangman.c b/tests/drv_hangman.c
index 953a4c6..19d809c 100644
--- a/tests/drv_hangman.c
+++ b/tests/drv_hangman.c
@@ -282,7 +282,6 @@ static void test_error_state_capture(unsigned ring_id,
  * case and it takes a lot more time to wrap, so the acthd can potentially keep
  * increasing for a long time
  */
-#define NSEC_PER_SEC   10LL
 static void hangcheck_unterminated(void)
 {
int fd;
diff --git a/tests/gem_wait.c b/tests/gem_wait.c
index b4127de..db04958 100644
--- a/tests/gem_wait.c
+++ b/tests/gem_wait.c
@@ -83,10 +83,6 @@ static void sigiter(int sig, siginfo_t *info, void *arg)
__sync_synchronize();
 }
 
-#define MSEC_PER_SEC (1000)
-#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
-#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
-
 #define BUSY 1
 #define HANG 2
 static void basic(int fd, unsigned engine, unsigned flags)
diff --git a/tests/kms_flip.c b/tests/kms_flip.c
index 7646aaf..842bc3a 100644
--- a/tests/kms_flip.c
+++ b/tests/kms_flip.c
@@ -83,9 +83,6 @@
 #define DRM_CAP_TIMESTAMP_MONOTONIC 6
 #endif
 
-#define USEC_PER_SEC 100L
-#define NSEC_PER_SEC 10L
-
 drmModeRes *resources;
 int drm_fd;
 static drm_intel_bufmgr *bufmgr;
diff --git a/tests/kms_sysfs_edid_timing b/tests/kms_sysfs_edid_timing
deleted file mode 100755
index 46ea540..000
--- a/tests/kms_sysfs_edid_timing
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-#
-# This check the time we take to read the content of all the possible 
connectors.
-# Without the edid -ENXIO patch 
(http://permalink.gmane.org/gmane.comp.video.dri.devel/62083),
-# we sometimes take a *really* long time. So let's just check for some 
reasonable timing here
-#
-
-DRM_LIB_ALLOW_NO_MASTER=1
-
-SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
-. $SOURCE_DIR/drm_lib.sh
-
-TIME1=$(date +%s%N)
-cat $(find /sys/devices/|grep drm | grep /status) > /dev/null
-TIME2=$(date +%s%N)
-
-# time in ms
-RES=$(((TIME2 - TIME1) / 100))
-
-if [ $RES -gt 600 ]; then
-   echo "Talking to outputs took ${RES}ms, something is wrong"
-   exit $IGT_EXIT_FAILURE
-fi
-
-exit $IGT_EXIT_SUCCESS
diff --git a/tests/kms_sysfs_edid_timing.c b/tests/kms_sysfs_edid_timing.c
new file mode 100644
index 000..8de4b78
--- /dev/null
+++ b/tests/kms_sysfs_edid_timing.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person 

[Intel-gfx] [PATCH i-g-t 2/3] tests/drv_module_reload_basic: Convert sh script to C version.

2016-10-20 Thread Marius Vlad
Signed-off-by: Marius Vlad 
---
 tests/Makefile.sources  |   2 +-
 tests/drv_module_reload_basic   |  97 ---
 tests/drv_module_reload_basic.c | 166 
 3 files changed, 167 insertions(+), 98 deletions(-)
 delete mode 100755 tests/drv_module_reload_basic
 create mode 100644 tests/drv_module_reload_basic.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 6d081c3..c35ea11 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -211,6 +211,7 @@ TESTS_progs = \
kms_pwrite_crc \
kms_sink_crc_basic \
prime_udl \
+   drv_module_reload_basic \
$(NULL)
 
 # IMPORTANT: The ZZ_ tests need to be run last!
@@ -221,7 +222,6 @@ TESTS_scripts_M = \
 TESTS_scripts = \
debugfs_emon_crash \
drv_debugfs_reader \
-   drv_module_reload_basic \
kms_sysfs_edid_timing \
sysfs_l3_parity \
test_rte_check \
diff --git a/tests/drv_module_reload_basic b/tests/drv_module_reload_basic
deleted file mode 100755
index a8d628d..000
--- a/tests/drv_module_reload_basic
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-#
-# Testcase: Reload the drm module
-#
-# ... we've broken this way too often :(
-#
-
-SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
-. $SOURCE_DIR/drm_lib.sh
-
-# no other drm service should be running, so we can just unbind
-
-# return 0 if module by name $1 is loaded according to lsmod
-function mod_loaded()
-{
-   lsmod | grep -w "^$1" &> /dev/null
-}
-
-function reload() {
-   local snd_hda_intel_unloaded
-
-   echo Reloading i915.ko with $*
-
-   # we must kick away fbcon (but only fbcon)
-   for vtcon in /sys/class/vtconsole/vtcon*/ ; do
-   if grep "frame buffer device" $vtcon/name > /dev/null ; then
-   echo unbinding $vtcon: `cat $vtcon/name`
-   echo 0 > $vtcon/bind
-   fi
-   done
-
-   # The sound driver uses our power well
-   pkill alsactl
-   snd_hda_intel_unloaded=0
-   if mod_loaded snd_hda_intel; then
-   rmmod snd_hda_intel && snd_hda_intel_unloaded=1
-   fi
-
-   # gen5 only
-   if mod_loaded intel_ips; then
-   rmmod intel_ips
-   fi
-   rmmod i915 || return $IGT_EXIT_SKIP
-   #ignore errors in intel-gtt, often built-in
-   rmmod intel-gtt &> /dev/null
-   # drm may be used by other devices (nouveau, radeon, udl, etc)
-   rmmod drm_kms_helper &> /dev/null
-   rmmod drm &> /dev/null
-
-   if mod_loaded i915; then
-   echo WARNING: i915.ko still loaded!
-   return $IGT_EXIT_FAILURE
-   else
-   echo module successfully unloaded
-   fi
-
-   modprobe i915 $*
-
-   if [ -f /sys/class/vtconsole/vtcon1/bind ]; then
-   echo 1 > /sys/class/vtconsole/vtcon1/bind
-   fi
-
-   modprobe -q snd_hda_intel || return $snd_hda_intel_unloaded
-}
-
-function finish_load() {
-   # does the device exist?
-   if $SOURCE_DIR/gem_alive > /dev/null ; then
-   echo "module successfully loaded again"
-   else
-   echo "failed to reload module successfully"
-   return $IGT_EXIT_FAILURE
-   fi
-
-   # then try to run something
-   if ! $SOURCE_DIR/gem_exec_store > /dev/null ; then
-   echo "failed to execute a simple batch after reload"
-   return $IGT_EXIT_FAILURE
-   fi
-
-   return $IGT_EXIT_SUCCESS
-}
-
-hda_dynamic_debug_enable
-
-reload || exit $?
-finish_load || exit $?
-
-# Repeat the module reload trying to to generate faults
-for i in $(seq 1 4); do
-   reload inject_load_failure=$i
-done
-
-reload || exit $?
-finish_load
-
-exit $?
diff --git a/tests/drv_module_reload_basic.c b/tests/drv_module_reload_basic.c
new file mode 100644
index 000..d36afde
--- /dev/null
+++ b/tests/drv_module_reload_basic.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 

Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Imre Deak
On Thu, 2016-10-20 at 21:20 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Imre Deak  wrote:
> > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> > expected LS mode. It also appears to be in a state where AUX DPCD reads
> > will succeed but return garbage recovering only after a few hundreds of
> > milliseconds. After the recovery time DPCD reads will result in the
> > correct values and things will continue to work. If I2C over AUX is
> > attempted during this recovery time (implying an AUX write transaction)
> > the firmware won't recover and will stay in this broken state.
> > 
> > As a workaround check if the firmware is in PCON state after resume and
> > if so wait until the correct DPCD values are returned. For this we
> > compare the branch descriptor with the one we cached during init time.
> > If the firmware was in the LS state, we skip the w/a and continue as
> > before.
> > 
> > Cc: Shashank Sharma 
> > Cc: Ville Syrjälä 
> > Cc: Jani Nikula 
> > Signed-off-by: Imre Deak 
> > ---
> >  drivers/gpu/drm/i915/intel_dp.c |  2 +-
> >  drivers/gpu/drm/i915/intel_drv.h|  6 -
> >  drivers/gpu/drm/i915/intel_lspcon.c | 52 
> > ++---
> >  3 files changed, 48 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_dp.c 
> > b/drivers/gpu/drm/i915/intel_dp.c
> > index e90211e..ec031db 100644
> > --- a/drivers/gpu/drm/i915/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
> >     intel_dp->DP = DP;
> >  }
> >  
> > -static bool
> > +bool
> >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
> >  {
> >     if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> > b/drivers/gpu/drm/i915/intel_drv.h
> > index a35e241..9a2366e 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -972,7 +972,9 @@ struct intel_dp {
> >  struct intel_lspcon {
> >     bool active;
> >     enum drm_lspcon_mode mode;
> > -   struct drm_dp_aux *aux;
> > +   struct intel_dp *intel_dp;
> > +   bool desc_valid;
> > +   struct intel_dp_desc desc;
> 
> I guess we could cache the desc in intel_dp directly. Independent of
> this patch.

It's not used anywhere else, but I can move it to intel_dp.

> 
> Also, I'm wondering if we could stick with just aux here, and read
> something else from dpcd instead.

Not sure either, I picked desc since we read it out anyway during init.

> 
> >  };
> >  
> >  struct intel_digital_port {
> > @@ -1469,6 +1471,8 @@ static inline unsigned int 
> > intel_dp_unused_lane_mask(int lane_count)
> >  }
> >  
> >  bool
> > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > +bool
> >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> >  void
> >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
> > b/drivers/gpu/drm/i915/intel_lspcon.c
> > index d2c8cb2..54c6173 100644
> > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > @@ -30,7 +30,7 @@
> >  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon 
> > *lspcon)
> >  {
> >     enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> > -   struct i2c_adapter *adapter = >aux->ddc;
> > +   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> >  
> >     if (drm_lspcon_get_mode(adapter, ¤t_mode))
> >     DRM_ERROR("Error reading LSPCON mode\n");
> > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> >  {
> >     int err;
> >     enum drm_lspcon_mode current_mode;
> > -   struct i2c_adapter *adapter = >aux->ddc;
> > +   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> >  
> >     err = drm_lspcon_get_mode(adapter, ¤t_mode);
> >     if (err) {
> > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> >  static bool lspcon_probe(struct intel_lspcon *lspcon)
> >  {
> >     enum drm_dp_dual_mode_type adaptor_type;
> > -   struct i2c_adapter *adapter = >aux->ddc;
> > +   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
> >  
> >     /* Lets probe the adaptor and check its type */
> >     adaptor_type = drm_dp_dual_mode_detect(adapter);
> > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
> >     return true;
> >  }
> >  
> > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> > +{
> > +   unsigned long start = jiffies;
> > +
> > +   if (!lspcon->desc_valid)
> > +   return;
> > +
> > +   while (1) {
> > +   struct intel_dp_desc desc;
> > +
> > +   /*
> > +    * The w/a only applies in PCON mode and we don't expect any
> > +    * AUX errors.
> > +    */
> > +   if 

Re: [Intel-gfx] [PATCH 00/10] mm: adjust get_user_pages* functions to explicitly pass FOLL_* flags

2016-10-20 Thread Michal Hocko
On Wed 19-10-16 10:23:55, Dave Hansen wrote:
> On 10/19/2016 10:01 AM, Michal Hocko wrote:
> > The question I had earlier was whether this has to be an explicit FOLL
> > flag used by g-u-p users or we can just use it internally when mm !=
> > current->mm
> 
> The reason I chose not to do that was that deferred work gets run under
> a basically random 'current'.  If we just use 'mm != current->mm', then
> the deferred work will sometimes have pkeys enforced and sometimes not,
> basically randomly.

OK, I see (async_pf_execute and ksm ). It makes more sense to me. Thanks
for the clarification.

-- 
Michal Hocko
SUSE Labs
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Jani Nikula
On Thu, 20 Oct 2016, Jani Nikula  wrote:
> On Thu, 20 Oct 2016, Imre Deak  wrote:
>> On my APL the LSPCON firmware resumes in PCON mode as opposed to the
>> expected LS mode. It also appears to be in a state where AUX DPCD reads
>> will succeed but return garbage recovering only after a few hundreds of
>> milliseconds. After the recovery time DPCD reads will result in the
>> correct values and things will continue to work. If I2C over AUX is
>> attempted during this recovery time (implying an AUX write transaction)
>> the firmware won't recover and will stay in this broken state.
>>
>> As a workaround check if the firmware is in PCON state after resume and
>> if so wait until the correct DPCD values are returned. For this we
>> compare the branch descriptor with the one we cached during init time.
>> If the firmware was in the LS state, we skip the w/a and continue as
>> before.
>>
>> Cc: Shashank Sharma 
>> Cc: Ville Syrjälä 
>> Cc: Jani Nikula 
>> Signed-off-by: Imre Deak 
>> ---
>>  drivers/gpu/drm/i915/intel_dp.c |  2 +-
>>  drivers/gpu/drm/i915/intel_drv.h|  6 -
>>  drivers/gpu/drm/i915/intel_lspcon.c | 52 
>> ++---
>>  3 files changed, 48 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_dp.c 
>> b/drivers/gpu/drm/i915/intel_dp.c
>> index e90211e..ec031db 100644
>> --- a/drivers/gpu/drm/i915/intel_dp.c
>> +++ b/drivers/gpu/drm/i915/intel_dp.c
>> @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
>>  intel_dp->DP = DP;
>>  }
>>  
>> -static bool
>> +bool
>>  intel_dp_read_dpcd(struct intel_dp *intel_dp)
>>  {
>>  if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
>> b/drivers/gpu/drm/i915/intel_drv.h
>> index a35e241..9a2366e 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -972,7 +972,9 @@ struct intel_dp {
>>  struct intel_lspcon {
>>  bool active;
>>  enum drm_lspcon_mode mode;
>> -struct drm_dp_aux *aux;
>> +struct intel_dp *intel_dp;
>> +bool desc_valid;
>> +struct intel_dp_desc desc;
>
> I guess we could cache the desc in intel_dp directly. Independent of
> this patch.
>
> Also, I'm wondering if we could stick with just aux here, and read
> something else from dpcd instead.
>
>>  };
>>  
>>  struct intel_digital_port {
>> @@ -1469,6 +1471,8 @@ static inline unsigned int 
>> intel_dp_unused_lane_mask(int lane_count)
>>  }
>>  
>>  bool
>> +intel_dp_read_dpcd(struct intel_dp *intel_dp);
>> +bool
>>  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>>  void
>>  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>> diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
>> b/drivers/gpu/drm/i915/intel_lspcon.c
>> index d2c8cb2..54c6173 100644
>> --- a/drivers/gpu/drm/i915/intel_lspcon.c
>> +++ b/drivers/gpu/drm/i915/intel_lspcon.c
>> @@ -30,7 +30,7 @@
>>  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon 
>> *lspcon)
>>  {
>>  enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
>> -struct i2c_adapter *adapter = >aux->ddc;
>> +struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>>  
>>  if (drm_lspcon_get_mode(adapter, _mode))
>>  DRM_ERROR("Error reading LSPCON mode\n");
>> @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>>  {
>>  int err;
>>  enum drm_lspcon_mode current_mode;
>> -struct i2c_adapter *adapter = >aux->ddc;
>> +struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>>  
>>  err = drm_lspcon_get_mode(adapter, _mode);
>>  if (err) {
>> @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>>  static bool lspcon_probe(struct intel_lspcon *lspcon)
>>  {
>>  enum drm_dp_dual_mode_type adaptor_type;
>> -struct i2c_adapter *adapter = >aux->ddc;
>> +struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>>  
>>  /* Lets probe the adaptor and check its type */
>>  adaptor_type = drm_dp_dual_mode_detect(adapter);
>> @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
>>  return true;
>>  }
>>  
>> +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
>> +{
>> +unsigned long start = jiffies;
>> +
>> +if (!lspcon->desc_valid)
>> +return;
>> +
>> +while (1) {
>> +struct intel_dp_desc desc;
>> +
>> +/*
>> + * The w/a only applies in PCON mode and we don't expect any
>> + * AUX errors.
>> + */
>> +if (!intel_dp_read_desc(lspcon->intel_dp, ))
>> +return;
>> +
>> +if (!memcmp(>desc, , sizeof(desc))) {
>> +DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u 
>> ms\n",
>> 

Re: [Intel-gfx] [PATCH 1/2] drm/i915/dp: Print full branch/sink descriptor for all outputs

2016-10-20 Thread Imre Deak
On Thu, 2016-10-20 at 21:06 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Imre Deak  wrote:
> > Extend the branch/sink descriptor info with the missing device ID
> > field and print this info for eDP and LSPCON connectors too.
> > 
> > Signed-off-by: Imre Deak 
> > ---
> >  drivers/gpu/drm/i915/intel_dp.c | 83 
> > +++--
> >  drivers/gpu/drm/i915/intel_drv.h| 13 ++
> >  drivers/gpu/drm/i915/intel_lspcon.c |  7 
> >  3 files changed, 53 insertions(+), 50 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_dp.c 
> > b/drivers/gpu/drm/i915/intel_dp.c
> > index 88f3b74..e90211e 100644
> > --- a/drivers/gpu/drm/i915/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > @@ -1442,42 +1442,34 @@ static void intel_dp_print_rates(struct intel_dp 
> > *intel_dp)
> >     DRM_DEBUG_KMS("common rates: %s\n", str);
> >  }
> >  
> > -static void intel_dp_print_hw_revision(struct intel_dp *intel_dp)
> > +static bool intel_dp_is_branch(struct intel_dp *intel_dp)
> 
> Belongs in drm dp helpers.

Ok.

> 
> >  {
> > -   uint8_t rev;
> > -   int len;
> > -
> > -   if ((drm_debug & DRM_UT_KMS) == 0)
> > -   return;
> > -
> > -   if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
> > -     DP_DWN_STRM_PORT_PRESENT))
> > -   return;
> > -
> > -   len = drm_dp_dpcd_read(_dp->aux, DP_BRANCH_HW_REV, , 1);
> > -   if (len < 0)
> > -   return;
> > -
> > -   DRM_DEBUG_KMS("sink hw revision: %d.%d\n", (rev & 0xf0) >> 4, rev & 
> > 0xf);
> > +   return intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
> > +      DP_DWN_STRM_PORT_PRESENT;
> >  }
> >  
> > -static void intel_dp_print_sw_revision(struct intel_dp *intel_dp)
> > +bool
> > +intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
> >  {
> > -   uint8_t rev[2];
> > -   int len;
> > +   u32 base = intel_dp_is_branch(intel_dp) ? DP_BRANCH_OUI : DP_SINK_OUI;
> >  
> > -   if ((drm_debug & DRM_UT_KMS) == 0)
> > -   return;
> > +   return drm_dp_dpcd_read(_dp->aux, base, desc, sizeof(*desc)) ==
> > +      sizeof(*desc);
> 
> Starting to read either branch or sink oui should be a standalone prep
> change. I guess this should be done, although I've seen crappy devices
> that report oui in wrong place...

Ok, can make that a separate change.

> 
> > +}
> >  
> > -   if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
> > -     DP_DWN_STRM_PORT_PRESENT))
> > -   return;
> > +void
> > +intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
> > +{
> > +   const char *dev_type = intel_dp_is_branch(intel_dp) ? "branch" : "sink";
> > +   bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] &
> > +      DP_OUI_SUPPORT;
> >  
> > -   len = drm_dp_dpcd_read(_dp->aux, DP_BRANCH_SW_REV, , 2);
> > -   if (len < 0)
> > -   return;
> > -
> > -   DRM_DEBUG_KMS("sink sw revision: %d.%d\n", rev[0], rev[1]);
> > +   DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %.*s HW-rev %d.%d SW-rev 
> > %d.%d\n",
> > +     dev_type,
> > +     (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)",
> > +     (int)sizeof(desc->device_id), desc->device_id,
> > +     (desc->hw_rev & 0xf0) >> 4, (desc->hw_rev & 0xf),
> > +     desc->sw_major_rev, desc->sw_minor_rev);
> 
> I've been thinking about starting to print this stuff too. But again,
> could be a standalone change.
> 
> >  }
> >  
> >  static int rate_to_index(int find, const int *rates)
> > @@ -3519,6 +3511,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
> >     if (!intel_dp_read_dpcd(intel_dp))
> >     return false;
> >  
> > +   if (drm_debug & DRM_UT_KMS) {
> > +   struct intel_dp_desc desc;
> > +
> > +   if (intel_dp_read_desc(intel_dp, ))
> > +   intel_dp_print_desc(intel_dp, );
> > +   }
> 
> I *really* don't think we should do dpcd access conditional to drm
> debugs. I smell heisenbugs just thinking about it.

It's already conditional,
see intel_dp_print_hw_revision(), intel_dp_print_sw_revision(). But I
can make it unconditional.

> 
> > +
> >     if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
> >     dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
> >     DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
> > @@ -3621,23 +3620,6 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
> >     return true;
> >  }
> >  
> > -static void
> > -intel_dp_probe_oui(struct intel_dp *intel_dp)
> > -{
> > -   u8 buf[3];
> > -
> > -   if (!(intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT))
> > -   return;
> > -
> > -   if (drm_dp_dpcd_read(_dp->aux, DP_SINK_OUI, buf, 3) == 3)
> > -   DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n",
> > -     buf[0], buf[1], buf[2]);
> > -
> > -   if (drm_dp_dpcd_read(_dp->aux, DP_BRANCH_OUI, buf, 3) == 3)
> > -   DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n",
> 

Re: [Intel-gfx] linux-next: Tree for Oct 20 (gpu/drm/i915)

2016-10-20 Thread Jani Nikula
On Thu, 20 Oct 2016, Daniel Vetter  wrote:
> On Thu, Oct 20, 2016 at 7:37 PM, Randy Dunlap  wrote:
>> On 10/19/16 20:20, Stephen Rothwell wrote:
>>> Hi all,
>>>
>>> Changes since 20161019:
>>>
>>
>> on i386: when CONFIG_ACPI is not enabled:
>
> Adding Zhenyu. Might be good to have a fix just for this that I
> directly pick up, since I want to tag the first 4.10 pull for Dave
> Airlie this w/e.

How about just this?

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 6aedc96aa412..94914381e8ef 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -85,7 +85,7 @@ config DRM_I915_USERPTR
 
 config DRM_I915_GVT
 bool "Enable Intel GVT-g graphics virtualization host support"
-depends on DRM_I915
+depends on DRM_I915 && ACPI
 default n
 help
  Choose this option if you want to enable Intel GVT-g graphics



> -Daniel
>
>> ../drivers/gpu/drm/i915/gvt/opregion.c: In function 
>> 'intel_gvt_init_opregion':
>> ../drivers/gpu/drm/i915/gvt/opregion.c:183:2: error: implicit declaration of 
>> function 'acpi_os_ioremap' [-Werror=implicit-function-declaration]
>>   gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
>>   ^
>> ../drivers/gpu/drm/i915/gvt/opregion.c:183:28: warning: assignment makes 
>> pointer from integer without a cast [enabled by default]
>>   gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
>> ^
>> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'read_pte64':
>> ../drivers/gpu/drm/i915/gvt/gtt.c:277:2: warning: left shift count >= width 
>> of type [enabled by default]
>>   pte |= ioread32(addr + 4) << 32;
>>   ^
>> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_get_pfn':
>> ../drivers/gpu/drm/i915/gvt/gtt.c:360:3: warning: left shift count >= width 
>> of type [enabled by default]
>>pfn = (e->val64 & ADDR_4K_MASK) >> 12;
>>^
>> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_set_pfn':
>> ../drivers/gpu/drm/i915/gvt/gtt.c:373:3: warning: left shift count >= width 
>> of type [enabled by default]
>>e->val64 &= ~ADDR_4K_MASK;
>>^
>> ../drivers/gpu/drm/i915/gvt/gtt.c:374:3: warning: left shift count >= width 
>> of type [enabled by default]
>>pfn &= (ADDR_4K_MASK >> 12);
>>^
>> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gma_to_pml4_index':
>> ../drivers/gpu/drm/i915/gvt/gtt.c:436:1: warning: right shift count >= width 
>> of type [enabled by default]
>>  DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
>>  ^
>>   CC  drivers/gpu/drm/radeon/si_smc.o
>> In file included from ../drivers/gpu/drm/i915/i915_drv.h:46:0,
>>  from ../drivers/gpu/drm/i915/gvt/gtt.c:36:
>> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 
>> 'intel_gvt_create_scratch_page':
>> ../drivers/gpu/drm/i915/gvt/gtt.c:1945:47: warning: cast from pointer to 
>> integer of different size [-Wpointer-to-int-cast]
>>gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
>>^
>> ../include/drm/drmP.h:201:43: note: in definition of macro 'DRM_ERROR'
>>   drm_printk(KERN_ERR, DRM_UT_NONE, fmt, ##__VA_ARGS__)
>>^
>> ../drivers/gpu/drm/i915/gvt/gtt.c:1945:3: note: in expansion of macro 
>> 'gvt_err'
>>gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
>>^
>>
>>
>>
>> --
>> ~Randy
>> ___
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Jani Nikula, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 7/8] drm/i915/gen9+: Program watermarks as a separate step during evasion

2016-10-20 Thread Matt Roper
On Wed, Oct 12, 2016 at 03:28:20PM +0200, Maarten Lankhorst wrote:
> Instead of running the watermark updates from the callbacks run
> them from a separate hook atomic_evade_watermarks.
> 
> This also gets rid of the global skl_results, which was required for
> keeping track of the current atomic commit.
> 
> Signed-off-by: Maarten Lankhorst 
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  7 ---
>  drivers/gpu/drm/i915/intel_display.c | 36 +-
>  drivers/gpu/drm/i915/intel_drv.h |  7 ---
>  drivers/gpu/drm/i915/intel_pm.c  | 38 
> ++--
>  drivers/gpu/drm/i915/intel_sprite.c  | 18 -
>  5 files changed, 28 insertions(+), 78 deletions(-)
> 
...
> @@ -14436,8 +14413,13 @@ static void intel_atomic_commit_tail(struct 
> drm_atomic_state *state)
>   intel_check_cpu_fifo_underruns(dev_priv);
>   intel_check_pch_fifo_underruns(dev_priv);
>  
> - if (!crtc->state->active)
> - intel_update_watermarks(crtc);
> + if (!crtc->state->active) {
> + if (dev_priv->display.initial_watermarks)
> + 
> dev_priv->display.initial_watermarks(intel_state,
> +  
> to_intel_crtc_state(crtc->state));
> + else
> + intel_update_watermarks(crtc);
> + }
>   }

This will change the behavior on ILK-style platforms won't it?
Previously the intel_update_watermarks here was a noop on those
platforms, but now we're calling initial_watermarks after the CRTC is
disabled there (note that there's also a call to it in pre_plane_update
that we purposely skip when doing any kind of modeset).


Matt

>   }
>  
> @@ -14599,7 +14581,6 @@ static int intel_atomic_commit(struct drm_device *dev,
>  
>   drm_atomic_helper_swap_state(state, true);
>   dev_priv->wm.distrust_bios_wm = false;
> - dev_priv->wm.skl_results = intel_state->wm_results;
>   intel_shared_dpll_commit(state);
>   intel_atomic_track_fbs(state);
>  
> @@ -14913,7 +14894,7 @@ static void intel_begin_crtc_commit(struct drm_crtc 
> *crtc,
>   intel_pipe_update_start(intel_crtc);
>  
>   if (modeset)
> - return;
> + goto out;
>  
>   if (crtc->state->color_mgmt_changed || 
> to_intel_crtc_state(crtc->state)->update_pipe) {
>   intel_color_set_csc(crtc->state);
> @@ -14925,6 +14906,7 @@ static void intel_begin_crtc_commit(struct drm_crtc 
> *crtc,
>   else if (INTEL_GEN(dev_priv) >= 9)
>   skl_detach_scalers(intel_crtc);
>  
> +out:
>   if (dev_priv->display.atomic_evade_watermarks)
>   
> dev_priv->display.atomic_evade_watermarks(to_intel_atomic_state(old_crtc_state->state),
>  intel_cstate);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index 9f04e26c4365..17cf1ee83bfb 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1761,13 +1761,6 @@ bool skl_ddb_allocation_equals(const struct 
> skl_ddb_allocation *old,
>  enum pipe pipe);
>  bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state,
>struct intel_crtc *intel_crtc);
> -void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
> -  const struct skl_plane_wm *wm,
> -  const struct skl_ddb_allocation *ddb);
> -void skl_write_plane_wm(struct intel_crtc *intel_crtc,
> - const struct skl_plane_wm *wm,
> - const struct skl_ddb_allocation *ddb,
> - int plane);
>  uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
>  bool ilk_disable_lp_wm(struct drm_device *dev);
>  int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index be3dd8cdc7ae..18c62d1eea19 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4179,27 +4179,35 @@ skl_compute_wm(struct drm_atomic_state *state)
>   return 0;
>  }
>  
> -static void skl_evade_crtc_wm(struct intel_atomic_state *state,
> -   struct intel_crtc_state *cstate)
> +static void skl_evade_crtc_wm(struct intel_atomic_state *state, struct 
> intel_crtc_state *cstate)
>  {
>   struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
>   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
>   struct skl_pipe_wm *pipe_wm = >wm.skl.optimal;
> + const struct skl_ddb_allocation *ddb = >wm_results.ddb;
>   enum pipe pipe = crtc->pipe;
> + int plane;
> +
> + if 

Re: [Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Jani Nikula
On Thu, 20 Oct 2016, Imre Deak  wrote:
> On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> expected LS mode. It also appears to be in a state where AUX DPCD reads
> will succeed but return garbage recovering only after a few hundreds of
> milliseconds. After the recovery time DPCD reads will result in the
> correct values and things will continue to work. If I2C over AUX is
> attempted during this recovery time (implying an AUX write transaction)
> the firmware won't recover and will stay in this broken state.
>
> As a workaround check if the firmware is in PCON state after resume and
> if so wait until the correct DPCD values are returned. For this we
> compare the branch descriptor with the one we cached during init time.
> If the firmware was in the LS state, we skip the w/a and continue as
> before.
>
> Cc: Shashank Sharma 
> Cc: Ville Syrjälä 
> Cc: Jani Nikula 
> Signed-off-by: Imre Deak 
> ---
>  drivers/gpu/drm/i915/intel_dp.c |  2 +-
>  drivers/gpu/drm/i915/intel_drv.h|  6 -
>  drivers/gpu/drm/i915/intel_lspcon.c | 52 
> ++---
>  3 files changed, 48 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index e90211e..ec031db 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
>   intel_dp->DP = DP;
>  }
>  
> -static bool
> +bool
>  intel_dp_read_dpcd(struct intel_dp *intel_dp)
>  {
>   if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index a35e241..9a2366e 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -972,7 +972,9 @@ struct intel_dp {
>  struct intel_lspcon {
>   bool active;
>   enum drm_lspcon_mode mode;
> - struct drm_dp_aux *aux;
> + struct intel_dp *intel_dp;
> + bool desc_valid;
> + struct intel_dp_desc desc;

I guess we could cache the desc in intel_dp directly. Independent of
this patch.

Also, I'm wondering if we could stick with just aux here, and read
something else from dpcd instead.

>  };
>  
>  struct intel_digital_port {
> @@ -1469,6 +1471,8 @@ static inline unsigned int 
> intel_dp_unused_lane_mask(int lane_count)
>  }
>  
>  bool
> +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> +bool
>  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>  void
>  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
> b/drivers/gpu/drm/i915/intel_lspcon.c
> index d2c8cb2..54c6173 100644
> --- a/drivers/gpu/drm/i915/intel_lspcon.c
> +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> @@ -30,7 +30,7 @@
>  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon 
> *lspcon)
>  {
>   enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> - struct i2c_adapter *adapter = >aux->ddc;
> + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>  
>   if (drm_lspcon_get_mode(adapter, _mode))
>   DRM_ERROR("Error reading LSPCON mode\n");
> @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>  {
>   int err;
>   enum drm_lspcon_mode current_mode;
> - struct i2c_adapter *adapter = >aux->ddc;
> + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>  
>   err = drm_lspcon_get_mode(adapter, _mode);
>   if (err) {
> @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>  static bool lspcon_probe(struct intel_lspcon *lspcon)
>  {
>   enum drm_dp_dual_mode_type adaptor_type;
> - struct i2c_adapter *adapter = >aux->ddc;
> + struct i2c_adapter *adapter = >intel_dp->aux.ddc;
>  
>   /* Lets probe the adaptor and check its type */
>   adaptor_type = drm_dp_dual_mode_detect(adapter);
> @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
>   return true;
>  }
>  
> +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> +{
> + unsigned long start = jiffies;
> +
> + if (!lspcon->desc_valid)
> + return;
> +
> + while (1) {
> + struct intel_dp_desc desc;
> +
> + /*
> +  * The w/a only applies in PCON mode and we don't expect any
> +  * AUX errors.
> +  */
> + if (!intel_dp_read_desc(lspcon->intel_dp, ))
> + return;
> +
> + if (!memcmp(>desc, , sizeof(desc))) {
> + DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u 
> ms\n",
> +   jiffies_to_msecs(jiffies - start));
> + return;
> + }
> +
> + if (time_after(jiffies, 

Re: [Intel-gfx] [PATCH 1/2] drm/i915/dp: Print full branch/sink descriptor for all outputs

2016-10-20 Thread Jani Nikula
On Thu, 20 Oct 2016, Imre Deak  wrote:
> Extend the branch/sink descriptor info with the missing device ID
> field and print this info for eDP and LSPCON connectors too.
>
> Signed-off-by: Imre Deak 
> ---
>  drivers/gpu/drm/i915/intel_dp.c | 83 
> +++--
>  drivers/gpu/drm/i915/intel_drv.h| 13 ++
>  drivers/gpu/drm/i915/intel_lspcon.c |  7 
>  3 files changed, 53 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index 88f3b74..e90211e 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -1442,42 +1442,34 @@ static void intel_dp_print_rates(struct intel_dp 
> *intel_dp)
>   DRM_DEBUG_KMS("common rates: %s\n", str);
>  }
>  
> -static void intel_dp_print_hw_revision(struct intel_dp *intel_dp)
> +static bool intel_dp_is_branch(struct intel_dp *intel_dp)

Belongs in drm dp helpers.

>  {
> - uint8_t rev;
> - int len;
> -
> - if ((drm_debug & DRM_UT_KMS) == 0)
> - return;
> -
> - if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
> -   DP_DWN_STRM_PORT_PRESENT))
> - return;
> -
> - len = drm_dp_dpcd_read(_dp->aux, DP_BRANCH_HW_REV, , 1);
> - if (len < 0)
> - return;
> -
> - DRM_DEBUG_KMS("sink hw revision: %d.%d\n", (rev & 0xf0) >> 4, rev & 
> 0xf);
> + return intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
> +DP_DWN_STRM_PORT_PRESENT;
>  }
>  
> -static void intel_dp_print_sw_revision(struct intel_dp *intel_dp)
> +bool
> +intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
>  {
> - uint8_t rev[2];
> - int len;
> + u32 base = intel_dp_is_branch(intel_dp) ? DP_BRANCH_OUI : DP_SINK_OUI;
>  
> - if ((drm_debug & DRM_UT_KMS) == 0)
> - return;
> + return drm_dp_dpcd_read(_dp->aux, base, desc, sizeof(*desc)) ==
> +sizeof(*desc);

Starting to read either branch or sink oui should be a standalone prep
change. I guess this should be done, although I've seen crappy devices
that report oui in wrong place...

> +}
>  
> - if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
> -   DP_DWN_STRM_PORT_PRESENT))
> - return;
> +void
> +intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
> +{
> + const char *dev_type = intel_dp_is_branch(intel_dp) ? "branch" : "sink";
> + bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] &
> +DP_OUI_SUPPORT;
>  
> - len = drm_dp_dpcd_read(_dp->aux, DP_BRANCH_SW_REV, , 2);
> - if (len < 0)
> - return;
> -
> - DRM_DEBUG_KMS("sink sw revision: %d.%d\n", rev[0], rev[1]);
> + DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %.*s HW-rev %d.%d SW-rev 
> %d.%d\n",
> +   dev_type,
> +   (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)",
> +   (int)sizeof(desc->device_id), desc->device_id,
> +   (desc->hw_rev & 0xf0) >> 4, (desc->hw_rev & 0xf),
> +   desc->sw_major_rev, desc->sw_minor_rev);

I've been thinking about starting to print this stuff too. But again,
could be a standalone change.

>  }
>  
>  static int rate_to_index(int find, const int *rates)
> @@ -3519,6 +3511,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
>   if (!intel_dp_read_dpcd(intel_dp))
>   return false;
>  
> + if (drm_debug & DRM_UT_KMS) {
> + struct intel_dp_desc desc;
> +
> + if (intel_dp_read_desc(intel_dp, ))
> + intel_dp_print_desc(intel_dp, );
> + }

I *really* don't think we should do dpcd access conditional to drm
debugs. I smell heisenbugs just thinking about it.

> +
>   if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
>   dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
>   DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
> @@ -3621,23 +3620,6 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
>   return true;
>  }
>  
> -static void
> -intel_dp_probe_oui(struct intel_dp *intel_dp)
> -{
> - u8 buf[3];
> -
> - if (!(intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT))
> - return;
> -
> - if (drm_dp_dpcd_read(_dp->aux, DP_SINK_OUI, buf, 3) == 3)
> - DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n",
> -   buf[0], buf[1], buf[2]);
> -
> - if (drm_dp_dpcd_read(_dp->aux, DP_BRANCH_OUI, buf, 3) == 3)
> - DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n",
> -   buf[0], buf[1], buf[2]);
> -}
> -
>  static bool
>  intel_dp_can_mst(struct intel_dp *intel_dp)
>  {
> @@ -4410,11 +4392,12 @@ intel_dp_long_pulse(struct intel_connector 
> *intel_connector)
> yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
>  
>   intel_dp_print_rates(intel_dp);
> + if (drm_debug & DRM_UT_KMS) {
> +

Re: [Intel-gfx] [PATCH 6/8] drm/i915/gen9+: Use the watermarks from crtc_state for everything.

2016-10-20 Thread Paulo Zanoni
Em Qua, 2016-10-12 às 15:28 +0200, Maarten Lankhorst escreveu:
> There's no need to keep a duplicate skl_pipe_wm around any more,
> everything can be discovered from crtc_state, which we pass around
> correctly now even in case of plane disable.
> 
> Signed-off-by: Maarten Lankhorst 
> ---
>  drivers/gpu/drm/i915/intel_display.c |  2 +-
>  drivers/gpu/drm/i915/intel_drv.h |  1 -
>  drivers/gpu/drm/i915/intel_pm.c  | 11 +--
>  3 files changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index 23d8c72dade3..340861826c46 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -13455,7 +13455,7 @@ static void verify_wm_state(struct drm_crtc
> *crtc,
>   return;
>  
>   skl_pipe_wm_get_hw_state(crtc, _wm);
> - sw_wm = _crtc->wm.active.skl;
> + sw_wm = _intel_crtc_state(new_state)->wm.skl.optimal;
>  
>   skl_ddb_get_hw_state(dev_priv, _ddb);
>   sw_ddb = _priv->wm.skl_hw.ddb;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index a176e6cebab3..9f04e26c4365 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -723,7 +723,6 @@ struct intel_crtc {
>   /* watermarks currently being used  */
>   union {
>   struct intel_pipe_wm ilk;
> - struct skl_pipe_wm skl;
>   } active;
>  
>   /* allow CxSR on this pipe */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 05ccd253fd7a..be3dd8cdc7ae 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3904,9 +3904,9 @@ bool skl_ddb_allocation_overlaps(struct
> drm_atomic_state *state,
>  static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
>     struct skl_ddb_allocation *ddb, /* out
> */
>     struct skl_pipe_wm *pipe_wm, /* out */
> +   const struct skl_pipe_wm *old_pipe_wm,
>     bool *changed /* out */)

Bikeshed: this patch adds an "in" parameter in the middle of the "out"
parameters. That's kinda ugly IMHO.

With that maybe fixed:
Reviewed-by: Paulo Zanoni 


>  {
> - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->crtc);
>   struct intel_crtc_state *intel_cstate =
> to_intel_crtc_state(cstate);
>   int ret;
>  
> @@ -3914,7 +3914,7 @@ static int skl_update_pipe_wm(struct
> drm_crtc_state *cstate,
>   if (ret)
>   return ret;
>  
> - if (!memcmp(_crtc->wm.active.skl, pipe_wm,
> sizeof(*pipe_wm)))
> + if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
>   *changed = false;
>   else
>   *changed = true;
> @@ -4155,10 +4155,12 @@ skl_compute_wm(struct drm_atomic_state
> *state)
>   for_each_crtc_in_state(state, crtc, cstate, i) {
>   struct intel_crtc_state *intel_cstate =
>   to_intel_crtc_state(cstate);
> + const struct skl_pipe_wm *old_pipe_wm =
> + _intel_crtc_state(crtc->state)-
> >wm.skl.optimal;
>  
>   pipe_wm = _cstate->wm.skl.optimal;
>   ret = skl_update_pipe_wm(cstate, >ddb,
> pipe_wm,
> -  );
> +  old_pipe_wm, );
>   if (ret)
>   return ret;
>  
> @@ -4203,8 +4205,6 @@ static void skl_update_wm(struct drm_crtc
> *crtc)
>   if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
>   return;
>  
> - intel_crtc->wm.active.skl = *pipe_wm;
> -
>   mutex_lock(_priv->wm.wm_mutex);
>  
>   /*
> @@ -4371,7 +4371,6 @@ void skl_wm_get_hw_state(struct drm_device
> *dev)
>   cstate = to_intel_crtc_state(crtc->state);
>  
>   skl_pipe_wm_get_hw_state(crtc, 
> >wm.skl.optimal);
> - intel_crtc->wm.active.skl = cstate->wm.skl.optimal;
>  
>   if (!intel_crtc->active)
>   hw->dirty_pipes |= drm_crtc_mask(crtc);
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 6/8] drm/i915/gen9+: Use the watermarks from crtc_state for everything.

2016-10-20 Thread Matt Roper
On Wed, Oct 12, 2016 at 03:28:19PM +0200, Maarten Lankhorst wrote:
> There's no need to keep a duplicate skl_pipe_wm around any more,
> everything can be discovered from crtc_state, which we pass around
> correctly now even in case of plane disable.

You might want to add some clarification that
intel(crtc->state)->wm.skl.optimal and intel_crtc->wm.active always hold
the same value by the time we finally drop our CRTC locks, so there's no
need for the duplication on gen9.  The reason we have
intel_crtc->wm.active in general is because the two-step platforms
(ILK-style, VLV-style) need cross-CRTC information during watermark
updates (which are potentially racing if multiple CRTC's are updated
independently but simultaneously) and the intel_crtc data is protected
by a separate wm_mutex.  Watermark calculations triggered by one CRTC's
update need to know which values are actually active on the hardware
(old, intermediate, or final) while another CRTC update is still being
processed.  The types of CRTC updates that are allowed to race on gen9
don't have the same kind of inter-CRTC data dependency.

I think this patch needs some slight rebasing to apply cleanly on top of
Lyude's changes that landed yesterday, but if you expand the commit
message justification a bit,

Reviewed-by: Matt Roper 


Matt

> 
> Signed-off-by: Maarten Lankhorst 
> ---
>  drivers/gpu/drm/i915/intel_display.c |  2 +-
>  drivers/gpu/drm/i915/intel_drv.h |  1 -
>  drivers/gpu/drm/i915/intel_pm.c  | 11 +--
>  3 files changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index 23d8c72dade3..340861826c46 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -13455,7 +13455,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
>   return;
>  
>   skl_pipe_wm_get_hw_state(crtc, _wm);
> - sw_wm = _crtc->wm.active.skl;
> + sw_wm = _intel_crtc_state(new_state)->wm.skl.optimal;
>  
>   skl_ddb_get_hw_state(dev_priv, _ddb);
>   sw_ddb = _priv->wm.skl_hw.ddb;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index a176e6cebab3..9f04e26c4365 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -723,7 +723,6 @@ struct intel_crtc {
>   /* watermarks currently being used  */
>   union {
>   struct intel_pipe_wm ilk;
> - struct skl_pipe_wm skl;
>   } active;
>  
>   /* allow CxSR on this pipe */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 05ccd253fd7a..be3dd8cdc7ae 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3904,9 +3904,9 @@ bool skl_ddb_allocation_overlaps(struct 
> drm_atomic_state *state,
>  static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
> struct skl_ddb_allocation *ddb, /* out */
> struct skl_pipe_wm *pipe_wm, /* out */
> +   const struct skl_pipe_wm *old_pipe_wm,
> bool *changed /* out */)
>  {
> - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->crtc);
>   struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
>   int ret;
>  
> @@ -3914,7 +3914,7 @@ static int skl_update_pipe_wm(struct drm_crtc_state 
> *cstate,
>   if (ret)
>   return ret;
>  
> - if (!memcmp(_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm)))
> + if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
>   *changed = false;
>   else
>   *changed = true;
> @@ -4155,10 +4155,12 @@ skl_compute_wm(struct drm_atomic_state *state)
>   for_each_crtc_in_state(state, crtc, cstate, i) {
>   struct intel_crtc_state *intel_cstate =
>   to_intel_crtc_state(cstate);
> + const struct skl_pipe_wm *old_pipe_wm =
> + _intel_crtc_state(crtc->state)->wm.skl.optimal;
>  
>   pipe_wm = _cstate->wm.skl.optimal;
>   ret = skl_update_pipe_wm(cstate, >ddb, pipe_wm,
> -  );
> +  old_pipe_wm, );
>   if (ret)
>   return ret;
>  
> @@ -4203,8 +4205,6 @@ static void skl_update_wm(struct drm_crtc *crtc)
>   if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
>   return;
>  
> - intel_crtc->wm.active.skl = *pipe_wm;
> -
>   mutex_lock(_priv->wm.wm_mutex);
>  
>   /*
> @@ -4371,7 +4371,6 @@ void skl_wm_get_hw_state(struct drm_device *dev)
>   cstate = to_intel_crtc_state(crtc->state);
>  
>   skl_pipe_wm_get_hw_state(crtc, >wm.skl.optimal);
> - 

Re: [Intel-gfx] [PATCH 5/8] drm/i915: Add a atomic evasion step to watermark programming.

2016-10-20 Thread Paulo Zanoni
Em Qua, 2016-10-12 às 15:28 +0200, Maarten Lankhorst escreveu:
> Allow the driver to write watermarks during atomic evasion.
> This will make it possible to write the watermarks in a cleaner
> way on gen9+.
> 
> Signed-off-by: Maarten Lankhorst 
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  6 --
>  drivers/gpu/drm/i915/intel_display.c | 18 --
>  drivers/gpu/drm/i915/intel_pm.c  | 19 +--
>  3 files changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index f65ccf9b0bea..09588c58148f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -484,6 +484,7 @@ struct sdvo_device_mapping {
>  
>  struct intel_connector;
>  struct intel_encoder;
> +struct intel_atomic_state;
>  struct intel_crtc_state;
>  struct intel_initial_plane_config;
>  struct intel_crtc;
> @@ -497,8 +498,9 @@ struct drm_i915_display_funcs {
>   int (*compute_intermediate_wm)(struct drm_device *dev,
>      struct intel_crtc
> *intel_crtc,
>      struct intel_crtc_state
> *newstate);
> - void (*initial_watermarks)(struct intel_crtc_state *cstate);
> - void (*optimize_watermarks)(struct intel_crtc_state
> *cstate);
> + void (*initial_watermarks)(struct intel_atomic_state *state,
> struct intel_crtc_state *cstate);
> + void (*atomic_evade_watermarks)(struct intel_atomic_state
> *state, struct intel_crtc_state *cstate);
> + void (*optimize_watermarks)(struct intel_atomic_state
> *state, struct intel_crtc_state *cstate);

Can't we just get intel_atomic_state from intel_crtc_state?  Why pass
both?


>   int (*compute_global_watermarks)(struct drm_atomic_state
> *state);
>   void (*update_wm)(struct drm_crtc *crtc);
>   int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index 55f8ec8c76ae..23d8c72dade3 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -5160,7 +5160,7 @@ static void intel_pre_plane_update(struct
> intel_crtc_state *old_crtc_state)
>    * us to.
>    */
>   if (dev_priv->display.initial_watermarks != NULL)
> - dev_priv->display.initial_watermarks(pipe_config);
> + dev_priv-
> >display.initial_watermarks(to_intel_atomic_state(old_state),
> pipe_config);
>   else if (pipe_config->update_wm_pre)
>   intel_update_watermarks(>base);
>  }
> @@ -5374,7 +5374,7 @@ static void ironlake_crtc_enable(struct
> intel_crtc_state *pipe_config,
>   intel_color_load_luts(_config->base);
>  
>   if (dev_priv->display.initial_watermarks != NULL)
> - dev_priv->display.initial_watermarks(intel_crtc-
> >config);
> + dev_priv-
> >display.initial_watermarks(to_intel_atomic_state(old_state),
> intel_crtc->config);
>   intel_enable_pipe(intel_crtc);
>  
>   if (intel_crtc->config->has_pch_encoder)
> @@ -5480,7 +5480,7 @@ static void haswell_crtc_enable(struct
> intel_crtc_state *pipe_config,
>   intel_ddi_enable_transcoder_func(crtc);
>  
>   if (dev_priv->display.initial_watermarks != NULL)
> - dev_priv->display.initial_watermarks(pipe_config);
> + dev_priv-
> >display.initial_watermarks(to_intel_atomic_state(old_state),
> pipe_config);
>   else
>   intel_update_watermarks(crtc);
>  
> @@ -14503,7 +14503,7 @@ static void intel_atomic_commit_tail(struct
> drm_atomic_state *state)
>   intel_cstate = to_intel_crtc_state(crtc->state);
>  
>   if (dev_priv->display.optimize_watermarks)
> - dev_priv-
> >display.optimize_watermarks(intel_cstate);
> + dev_priv-
> >display.optimize_watermarks(intel_state, intel_cstate);
>   }
>  
>   for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
> @@ -14908,7 +14908,6 @@ static void intel_begin_crtc_commit(struct
> drm_crtc *crtc,
>   struct intel_crtc_state *old_intel_state =
>   to_intel_crtc_state(old_crtc_state);
>   bool modeset = needs_modeset(crtc->state);
> - enum pipe pipe = intel_crtc->pipe;
>  
>   /* Perform vblank evasion around commit operation */
>   intel_pipe_update_start(intel_crtc);
> @@ -14923,12 +14922,11 @@ static void intel_begin_crtc_commit(struct
> drm_crtc *crtc,
>  
>   if (intel_cstate->update_pipe)
>   intel_update_pipe_config(intel_crtc,
> old_intel_state);
> - else if (INTEL_GEN(dev_priv) >= 9) {
> + else if (INTEL_GEN(dev_priv) >= 9)
>   skl_detach_scalers(intel_crtc);
>  
> - I915_WRITE(PIPE_WM_LINETIME(pipe),
> -    intel_cstate->wm.skl.optimal.linetime);
> - }
> + if (dev_priv->display.atomic_evade_watermarks)
> + 

Re: [Intel-gfx] [PATCH 4/8] drm/i915/skl+: Clean up minimum allocations.

2016-10-20 Thread Paulo Zanoni
Em Qua, 2016-10-12 às 15:28 +0200, Maarten Lankhorst escreveu:
> Move calculating minimum allocations to a helper, which cleans up the
> code some more. The cursor is still allocated in advance because it
> doesn't count towards data rate and should always be reserved.
> 
> Signed-off-by: Maarten Lankhorst 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 66 ---
> --
>  1 file changed, 39 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 83c1b0acef38..45fb8275abea 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3342,6 +3342,32 @@ skl_ddb_min_alloc(const struct drm_plane_state
> *pstate,
>   return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) *
> min_scanlines/4 + 3;
>  }
>  
> +static void
> +skl_ddb_calc_min(const struct intel_crtc_state *cstate, int
> num_active,
> +  uint16_t *minimum, uint16_t *y_minimum)
> +{
> + const struct drm_plane_state *pstate;
> + struct drm_plane *plane;
> + enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe;
> +
> + drm_atomic_crtc_state_for_each_plane_state(plane, pstate,
> >base) {
> + struct intel_plane *intel_plane =
> to_intel_plane(plane);
> + int id = skl_wm_plane_id(intel_plane);
> +
> + if (intel_plane->pipe != pipe ||
> + id == PLANE_CURSOR)

You can also remove the check for pipe here.


> + continue;
> +
> + if (!pstate->visible)
> + continue;
> +
> + minimum[id] = skl_ddb_min_alloc(pstate, 0);
> + y_minimum[id] = skl_ddb_min_alloc(pstate, 1);
> + }
> +
> + minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
> +}
> +
>  static int
>  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>     struct skl_ddb_allocation *ddb /* out */)
> @@ -3350,12 +3376,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> *cstate,
>   struct drm_crtc *crtc = cstate->base.crtc;
>   struct drm_device *dev = crtc->dev;
>   struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> - struct intel_plane *intel_plane;
> - struct drm_plane *plane;
> - const struct drm_plane_state *pstate;
>   enum pipe pipe = intel_crtc->pipe;
>   struct skl_ddb_entry *alloc = >wm.skl.ddb;
> - uint16_t alloc_size, start, cursor_blocks;
> + uint16_t alloc_size, start;
>   uint16_t minimum[I915_MAX_PLANES] = {};
>   uint16_t y_minimum[I915_MAX_PLANES] = {};
>   unsigned int total_data_rate;
> @@ -3384,35 +3407,21 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> *cstate,
>   return 0;
>   }
>  
> - cursor_blocks = skl_cursor_allocation(num_active);
> - ddb->plane[pipe][PLANE_CURSOR].start = alloc->end -
> cursor_blocks;
> - ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
> -
> - alloc_size -= cursor_blocks;
> -
> - /* 1. Allocate the mininum required blocks for each active
> plane */
> - drm_atomic_crtc_state_for_each_plane_state(plane, pstate,
> >base) {
> - intel_plane = to_intel_plane(plane);
> - id = skl_wm_plane_id(intel_plane);
> -
> - if (intel_plane->pipe != pipe)
> - continue;
> -
> - if (!pstate->visible)
> - continue;
> + skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
>  
> - if (plane->type == DRM_PLANE_TYPE_CURSOR)
> - continue;
> -
> - minimum[id] = skl_ddb_min_alloc(pstate, 0);
> - y_minimum[id] = skl_ddb_min_alloc(pstate, 1);
> - }
> + /* 1. Allocate the mininum required blocks for each active
> plane
> +  * and allocate the cursor, it doesn't require extra
> allocation
> +  * proportional to the data rate.
> +  */
>  
> - for (i = 0; i < PLANE_CURSOR; i++) {
> + for (i = 0; i < I915_MAX_PLANES; i++) {

As I mentioned earlier, this is also an unsafe loop. I know you didn't
introduce it, so we can fix this in a next patch.

With the pipe check removed (and Matt's requests addressed):
Reviewed-by: Paulo Zanoni 

>   alloc_size -= minimum[i];
>   alloc_size -= y_minimum[i];
>   }
>  
> + ddb->plane[pipe][PLANE_CURSOR].start = alloc->end -
> minimum[PLANE_CURSOR];
> + ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
> +
>   /*
>    * 2. Distribute the remaining space in proportion to the
> amount of
>    * data each plane needs to fetch from memory.
> @@ -3428,6 +3437,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> *cstate,
>   unsigned rate;
>   uint16_t plane_blocks, y_plane_blocks = 0;
>  
> + if (id == PLANE_CURSOR)
> + continue;
> +
>   rate = data_rate[id];
>  
>   /*

Re: [Intel-gfx] linux-next: Tree for Oct 20 (gpu/drm/i915)

2016-10-20 Thread Daniel Vetter
On Thu, Oct 20, 2016 at 7:37 PM, Randy Dunlap  wrote:
> On 10/19/16 20:20, Stephen Rothwell wrote:
>> Hi all,
>>
>> Changes since 20161019:
>>
>
> on i386: when CONFIG_ACPI is not enabled:

Adding Zhenyu. Might be good to have a fix just for this that I
directly pick up, since I want to tag the first 4.10 pull for Dave
Airlie this w/e.
-Daniel

> ../drivers/gpu/drm/i915/gvt/opregion.c: In function 'intel_gvt_init_opregion':
> ../drivers/gpu/drm/i915/gvt/opregion.c:183:2: error: implicit declaration of 
> function 'acpi_os_ioremap' [-Werror=implicit-function-declaration]
>   gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
>   ^
> ../drivers/gpu/drm/i915/gvt/opregion.c:183:28: warning: assignment makes 
> pointer from integer without a cast [enabled by default]
>   gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
> ^
> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'read_pte64':
> ../drivers/gpu/drm/i915/gvt/gtt.c:277:2: warning: left shift count >= width 
> of type [enabled by default]
>   pte |= ioread32(addr + 4) << 32;
>   ^
> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_get_pfn':
> ../drivers/gpu/drm/i915/gvt/gtt.c:360:3: warning: left shift count >= width 
> of type [enabled by default]
>pfn = (e->val64 & ADDR_4K_MASK) >> 12;
>^
> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_set_pfn':
> ../drivers/gpu/drm/i915/gvt/gtt.c:373:3: warning: left shift count >= width 
> of type [enabled by default]
>e->val64 &= ~ADDR_4K_MASK;
>^
> ../drivers/gpu/drm/i915/gvt/gtt.c:374:3: warning: left shift count >= width 
> of type [enabled by default]
>pfn &= (ADDR_4K_MASK >> 12);
>^
> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gma_to_pml4_index':
> ../drivers/gpu/drm/i915/gvt/gtt.c:436:1: warning: right shift count >= width 
> of type [enabled by default]
>  DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
>  ^
>   CC  drivers/gpu/drm/radeon/si_smc.o
> In file included from ../drivers/gpu/drm/i915/i915_drv.h:46:0,
>  from ../drivers/gpu/drm/i915/gvt/gtt.c:36:
> ../drivers/gpu/drm/i915/gvt/gtt.c: In function 
> 'intel_gvt_create_scratch_page':
> ../drivers/gpu/drm/i915/gvt/gtt.c:1945:47: warning: cast from pointer to 
> integer of different size [-Wpointer-to-int-cast]
>gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
>^
> ../include/drm/drmP.h:201:43: note: in definition of macro 'DRM_ERROR'
>   drm_printk(KERN_ERR, DRM_UT_NONE, fmt, ##__VA_ARGS__)
>^
> ../drivers/gpu/drm/i915/gvt/gtt.c:1945:3: note: in expansion of macro 
> 'gvt_err'
>gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
>^
>
>
>
> --
> ~Randy
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/dp: Print full branch/sink descriptor for all outputs

2016-10-20 Thread Patchwork
== Series Details ==

Series: series starting with [1/2] drm/i915/dp: Print full branch/sink 
descriptor for all outputs
URL   : https://patchwork.freedesktop.org/series/14123/
State : success

== Summary ==

Series 14123v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/14123/revisions/1/mbox/

Test drv_module_reload_basic:
dmesg-warn -> PASS   (fi-skl-6700hq)

fi-bdw-5557u total:246  pass:231  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:246  pass:204  dwarn:0   dfail:0   fail:0   skip:42 
fi-bxt-t5700 total:246  pass:216  dwarn:0   dfail:0   fail:0   skip:30 
fi-byt-j1900 total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-byt-n2820 total:246  pass:211  dwarn:0   dfail:0   fail:0   skip:35 
fi-hsw-4770  total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650   total:246  pass:185  dwarn:0   dfail:0   fail:1   skip:60 
fi-ivb-3520m total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770  total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-kbl-7200u total:246  pass:222  dwarn:0   dfail:0   fail:0   skip:24 
fi-skl-6260u total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:246  pass:223  dwarn:0   dfail:0   fail:0   skip:23 
fi-skl-6700k total:246  pass:221  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hqtotal:246  pass:231  dwarn:0   dfail:0   fail:0   skip:15 
fi-snb-2520m total:246  pass:210  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600  total:246  pass:209  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2778/

5113d7495dab3ea4d14a7698368c6be80f6c045c drm-intel-nightly: 
2016y-10m-20d-13h-31m-16s UTC integration manifest
c3e5a2c drm/i915/lspcon: Add workaround for resuming in PCON mode
e8e46ae drm/i915/dp: Print full branch/sink descriptor for all outputs

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] linux-next: Tree for Oct 20 (gpu/drm/i915)

2016-10-20 Thread Randy Dunlap
On 10/19/16 20:20, Stephen Rothwell wrote:
> Hi all,
> 
> Changes since 20161019:
> 

on i386: when CONFIG_ACPI is not enabled:

../drivers/gpu/drm/i915/gvt/opregion.c: In function 'intel_gvt_init_opregion':
../drivers/gpu/drm/i915/gvt/opregion.c:183:2: error: implicit declaration of 
function 'acpi_os_ioremap' [-Werror=implicit-function-declaration]
  gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
  ^
../drivers/gpu/drm/i915/gvt/opregion.c:183:28: warning: assignment makes 
pointer from integer without a cast [enabled by default]
  gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
^
../drivers/gpu/drm/i915/gvt/gtt.c: In function 'read_pte64':
../drivers/gpu/drm/i915/gvt/gtt.c:277:2: warning: left shift count >= width of 
type [enabled by default]
  pte |= ioread32(addr + 4) << 32;
  ^
../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_get_pfn':
../drivers/gpu/drm/i915/gvt/gtt.c:360:3: warning: left shift count >= width of 
type [enabled by default]
   pfn = (e->val64 & ADDR_4K_MASK) >> 12;
   ^
../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gtt_set_pfn':
../drivers/gpu/drm/i915/gvt/gtt.c:373:3: warning: left shift count >= width of 
type [enabled by default]
   e->val64 &= ~ADDR_4K_MASK;
   ^
../drivers/gpu/drm/i915/gvt/gtt.c:374:3: warning: left shift count >= width of 
type [enabled by default]
   pfn &= (ADDR_4K_MASK >> 12);
   ^
../drivers/gpu/drm/i915/gvt/gtt.c: In function 'gen8_gma_to_pml4_index':
../drivers/gpu/drm/i915/gvt/gtt.c:436:1: warning: right shift count >= width of 
type [enabled by default]
 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
 ^
  CC  drivers/gpu/drm/radeon/si_smc.o
In file included from ../drivers/gpu/drm/i915/i915_drv.h:46:0,
 from ../drivers/gpu/drm/i915/gvt/gtt.c:36:
../drivers/gpu/drm/i915/gvt/gtt.c: In function 'intel_gvt_create_scratch_page':
../drivers/gpu/drm/i915/gvt/gtt.c:1945:47: warning: cast from pointer to 
integer of different size [-Wpointer-to-int-cast]
   gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
   ^
../include/drm/drmP.h:201:43: note: in definition of macro 'DRM_ERROR'
  drm_printk(KERN_ERR, DRM_UT_NONE, fmt, ##__VA_ARGS__)
   ^
../drivers/gpu/drm/i915/gvt/gtt.c:1945:3: note: in expansion of macro 'gvt_err'
   gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr);
   ^



-- 
~Randy
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/8] drm/i915/skl+: Remove data_rate from watermark struct.

2016-10-20 Thread Paulo Zanoni
Em Qui, 2016-10-20 às 15:18 -0200, Paulo Zanoni escreveu:
> Em Qua, 2016-10-19 às 15:13 -0700, Matt Roper escreveu:
> > 
> > On Wed, Oct 12, 2016 at 03:28:15PM +0200, Maarten Lankhorst wrote:
> > > 
> > > 
> > > It's only used in one function, and can be calculated without
> > > caching it
> > > in the global struct by using
> > > drm_atomic_crtc_state_for_each_plane_state.
> > > 
> > > Signed-off-by: Maarten Lankhorst  > > om
> > > > 
> > > > 
> > > ---
> > >  drivers/gpu/drm/i915/intel_drv.h |  4 
> > >  drivers/gpu/drm/i915/intel_pm.c  | 44 +++---
> > > --
> > > 
> > >  2 files changed, 21 insertions(+), 27 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > b/drivers/gpu/drm/i915/intel_drv.h
> > > index bb468c974e14..888054518f3c 100644
> > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > @@ -502,10 +502,6 @@ struct intel_crtc_wm_state {
> > >   struct skl_pipe_wm optimal;
> > >   struct skl_ddb_entry ddb;
> > >  
> > > - /* cached plane data rate */
> > > - unsigned
> > > plane_data_rate[I915_MAX_PLANES];
> > > - unsigned
> > > plane_y_data_rate[I915_MAX_PLANES];
> > > -
> > >   /* minimum block allocation */
> > >   uint16_t
> > > minimum_blocks[I915_MAX_PLANES];
> > >   uint16_t
> > > minimum_y_blocks[I915_MAX_PLANES];
> > > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > > b/drivers/gpu/drm/i915/intel_pm.c
> > > index b96a899c899d..97b6202c4097 100644
> > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > @@ -3236,12 +3236,13 @@ skl_plane_relative_data_rate(const struct
> > > intel_crtc_state *cstate,
> > >   *   3 * 4096 * 8192  * 4 < 2^32
> > >   */
> > >  static unsigned int
> > > -skl_get_total_relative_data_rate(struct intel_crtc_state
> > > *intel_cstate)
> > > +skl_get_total_relative_data_rate(struct intel_crtc_state
> > > *intel_cstate,
> > > +  unsigned *plane_data_rate,
> > > +  unsigned *plane_y_data_rate)
> > >  {
> > >   struct drm_crtc_state *cstate = _cstate->base;
> > >   struct drm_atomic_state *state = cstate->state;
> > >   struct drm_crtc *crtc = cstate->crtc;
> > > - struct drm_device *dev = crtc->dev;
> > >   struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> > >   struct drm_plane *plane;
> > >   const struct intel_plane *intel_plane;
> > > @@ -3263,21 +3264,16 @@ skl_get_total_relative_data_rate(struct
> > > intel_crtc_state *intel_cstate)
> > >   /* packed/uv */
> > >   rate =
> > > skl_plane_relative_data_rate(intel_cstate,
> > >   pstate, 0);
> > > - intel_cstate->wm.skl.plane_data_rate[id] = rate;
> > > + plane_data_rate[id] = rate;
> > > +
> > > + total_data_rate += rate;
> > >  
> > >   /* y-plane */
> > >   rate =
> > > skl_plane_relative_data_rate(intel_cstate,
> > >   pstate, 1);
> > > - intel_cstate->wm.skl.plane_y_data_rate[id] =
> > > rate;
> > > - }
> > > -
> > > - /* Calculate CRTC's total data rate from cached values
> > > */
> > > - for_each_intel_plane_on_crtc(dev, intel_crtc,
> > > intel_plane)
> > > {
> > > - int id = skl_wm_plane_id(intel_plane);
> > > + plane_y_data_rate[id] = rate;
> > >  
> > > - /* packed/uv */
> > > - total_data_rate += intel_cstate-
> > > > 
> > > > wm.skl.plane_data_rate[id];
> > > - total_data_rate += intel_cstate-
> > > > 
> > > > wm.skl.plane_y_data_rate[id];
> > > + total_data_rate += rate;
> > >   }
> > >  
> > >   return total_data_rate;
> > > @@ -3366,6 +3362,9 @@ skl_allocate_pipe_ddb(struct
> > > intel_crtc_state
> > > *cstate,
> > >   int num_active;
> > >   int id, i;
> > >  

Also obligatory bikeshed to remove the ugly blank line above :)

> > > + unsigned data_rate[I915_MAX_PLANES] = {};
> > > + unsigned y_data_rate[I915_MAX_PLANES] = {};
> > > +
> > 
> > Minor nitpick; if you picked a different names here (e.g.,
> > plane_data_rate[]) then you could leave the local variables farther
> > down
> > named 'data_rate' and 'y_data_rate' which would reduce the diff
> > changes
> > and result in a slightly smaller patch.
> > 
> > Whether or not you feel like making that change, killing the
> > caching
> > is
> > good so,
> > 
> > Reviewed-by: Matt Roper 
> > 
> > 
> > > 
> > > 
> > >   /* Clear the partitioning for disabled planes. */
> > >   memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
> > >   memset(ddb->y_plane[pipe], 0, sizeof(ddb-
> > > >y_plane[pipe]));
> > > @@ -3425,29 +3424,28 @@ skl_allocate_pipe_ddb(struct
> > > intel_crtc_state *cstate,
> > >    *
> > >    * FIXME: we may not allocate every single block here.
> > >    */
> > > - 

Re: [Intel-gfx] [PATCH 3/8] drm/i915/skl+: Remove minimum block allocation from crtc state.

2016-10-20 Thread Paulo Zanoni
Em Qua, 2016-10-19 às 15:13 -0700, Matt Roper escreveu:
> On Wed, Oct 12, 2016 at 03:28:16PM +0200, Maarten Lankhorst wrote:
> > 
> > This is not required any more now that we get fresh state from
> > drm_atomic_crtc_state_for_each_plane_state. Zero all state
> > in advance.
> > 
> > Signed-off-by: Maarten Lankhorst  > >
> 
> Reviewed-by: Matt Roper 

Reviewed-by: Paulo Zanoni 

You could also get rid of the unsafe loop that computes alloc_size:
just do it in the main loop now that we iterate over everything. But
this can be done in a separate patch.

> 
> > 
> > ---
> >  drivers/gpu/drm/i915/intel_drv.h |  4 
> >  drivers/gpu/drm/i915/intel_pm.c  | 15 +--
> >  2 files changed, 5 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > b/drivers/gpu/drm/i915/intel_drv.h
> > index 888054518f3c..a176e6cebab3 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -501,10 +501,6 @@ struct intel_crtc_wm_state {
> >     /* gen9+ only needs 1-step wm programming
> > */
> >     struct skl_pipe_wm optimal;
> >     struct skl_ddb_entry ddb;
> > -
> > -   /* minimum block allocation */
> > -   uint16_t minimum_blocks[I915_MAX_PLANES];
> > -   uint16_t
> > minimum_y_blocks[I915_MAX_PLANES];
> >     } skl;
> >     };
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > b/drivers/gpu/drm/i915/intel_pm.c
> > index 97b6202c4097..83c1b0acef38 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -3356,8 +3356,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> > *cstate,
> >     enum pipe pipe = intel_crtc->pipe;
> >     struct skl_ddb_entry *alloc = >wm.skl.ddb;
> >     uint16_t alloc_size, start, cursor_blocks;
> > -   uint16_t *minimum = cstate->wm.skl.minimum_blocks;
> > -   uint16_t *y_minimum = cstate->wm.skl.minimum_y_blocks;
> > +   uint16_t minimum[I915_MAX_PLANES] = {};
> > +   uint16_t y_minimum[I915_MAX_PLANES] = {};
> >     unsigned int total_data_rate;
> >     int num_active;
> >     int id, i;
> > @@ -3398,16 +3398,11 @@ skl_allocate_pipe_ddb(struct
> > intel_crtc_state *cstate,
> >     if (intel_plane->pipe != pipe)
> >     continue;
> >  
> > -   if (!pstate->visible) {
> > -   minimum[id] = 0;
> > -   y_minimum[id] = 0;
> > +   if (!pstate->visible)
> >     continue;
> > -   }
> > -   if (plane->type == DRM_PLANE_TYPE_CURSOR) {
> > -   minimum[id] = 0;
> > -   y_minimum[id] = 0;
> > +
> > +   if (plane->type == DRM_PLANE_TYPE_CURSOR)
> >     continue;
> > -   }
> >  
> >     minimum[id] = skl_ddb_min_alloc(pstate, 0);
> >     y_minimum[id] = skl_ddb_min_alloc(pstate, 1);
> > -- 
> > 2.7.4
> > 
> > ___
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/8] drm/i915/skl+: Remove data_rate from watermark struct.

2016-10-20 Thread Paulo Zanoni
Em Qua, 2016-10-19 às 15:13 -0700, Matt Roper escreveu:
> On Wed, Oct 12, 2016 at 03:28:15PM +0200, Maarten Lankhorst wrote:
> > 
> > It's only used in one function, and can be calculated without
> > caching it
> > in the global struct by using
> > drm_atomic_crtc_state_for_each_plane_state.
> > 
> > Signed-off-by: Maarten Lankhorst  > >
> > ---
> >  drivers/gpu/drm/i915/intel_drv.h |  4 
> >  drivers/gpu/drm/i915/intel_pm.c  | 44 +++-
> > 
> >  2 files changed, 21 insertions(+), 27 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > b/drivers/gpu/drm/i915/intel_drv.h
> > index bb468c974e14..888054518f3c 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -502,10 +502,6 @@ struct intel_crtc_wm_state {
> >     struct skl_pipe_wm optimal;
> >     struct skl_ddb_entry ddb;
> >  
> > -   /* cached plane data rate */
> > -   unsigned plane_data_rate[I915_MAX_PLANES];
> > -   unsigned
> > plane_y_data_rate[I915_MAX_PLANES];
> > -
> >     /* minimum block allocation */
> >     uint16_t minimum_blocks[I915_MAX_PLANES];
> >     uint16_t
> > minimum_y_blocks[I915_MAX_PLANES];
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > b/drivers/gpu/drm/i915/intel_pm.c
> > index b96a899c899d..97b6202c4097 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -3236,12 +3236,13 @@ skl_plane_relative_data_rate(const struct
> > intel_crtc_state *cstate,
> >   *   3 * 4096 * 8192  * 4 < 2^32
> >   */
> >  static unsigned int
> > -skl_get_total_relative_data_rate(struct intel_crtc_state
> > *intel_cstate)
> > +skl_get_total_relative_data_rate(struct intel_crtc_state
> > *intel_cstate,
> > +    unsigned *plane_data_rate,
> > +    unsigned *plane_y_data_rate)
> >  {
> >     struct drm_crtc_state *cstate = _cstate->base;
> >     struct drm_atomic_state *state = cstate->state;
> >     struct drm_crtc *crtc = cstate->crtc;
> > -   struct drm_device *dev = crtc->dev;
> >     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >     struct drm_plane *plane;
> >     const struct intel_plane *intel_plane;
> > @@ -3263,21 +3264,16 @@ skl_get_total_relative_data_rate(struct
> > intel_crtc_state *intel_cstate)
> >     /* packed/uv */
> >     rate = skl_plane_relative_data_rate(intel_cstate,
> >     pstate, 0);
> > -   intel_cstate->wm.skl.plane_data_rate[id] = rate;
> > +   plane_data_rate[id] = rate;
> > +
> > +   total_data_rate += rate;
> >  
> >     /* y-plane */
> >     rate = skl_plane_relative_data_rate(intel_cstate,
> >     pstate, 1);
> > -   intel_cstate->wm.skl.plane_y_data_rate[id] = rate;
> > -   }
> > -
> > -   /* Calculate CRTC's total data rate from cached values */
> > -   for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)
> > {
> > -   int id = skl_wm_plane_id(intel_plane);
> > +   plane_y_data_rate[id] = rate;
> >  
> > -   /* packed/uv */
> > -   total_data_rate += intel_cstate-
> > >wm.skl.plane_data_rate[id];
> > -   total_data_rate += intel_cstate-
> > >wm.skl.plane_y_data_rate[id];
> > +   total_data_rate += rate;
> >     }
> >  
> >     return total_data_rate;
> > @@ -3366,6 +3362,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> > *cstate,
> >     int num_active;
> >     int id, i;
> >  
> > +   unsigned data_rate[I915_MAX_PLANES] = {};
> > +   unsigned y_data_rate[I915_MAX_PLANES] = {};
> > +
> 
> Minor nitpick; if you picked a different names here (e.g.,
> plane_data_rate[]) then you could leave the local variables farther
> down
> named 'data_rate' and 'y_data_rate' which would reduce the diff
> changes
> and result in a slightly smaller patch.
> 
> Whether or not you feel like making that change, killing the caching
> is
> good so,
> 
> Reviewed-by: Matt Roper 
> 
> 
> > 
> >     /* Clear the partitioning for disabled planes. */
> >     memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
> >     memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
> > @@ -3425,29 +3424,28 @@ skl_allocate_pipe_ddb(struct
> > intel_crtc_state *cstate,
> >      *
> >      * FIXME: we may not allocate every single block here.
> >      */
> > -   total_data_rate =
> > skl_get_total_relative_data_rate(cstate);
> > +   total_data_rate = skl_get_total_relative_data_rate(cstate,
> > data_rate, y_data_rate);
> >     if (total_data_rate == 0)
> >     return 0;
> >  
> >     start = alloc->start;
> > -   for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)
> > {
> > -   unsigned int data_rate, y_data_rate;
> > +   

[Intel-gfx] [PATCH 2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

2016-10-20 Thread Imre Deak
On my APL the LSPCON firmware resumes in PCON mode as opposed to the
expected LS mode. It also appears to be in a state where AUX DPCD reads
will succeed but return garbage recovering only after a few hundreds of
milliseconds. After the recovery time DPCD reads will result in the
correct values and things will continue to work. If I2C over AUX is
attempted during this recovery time (implying an AUX write transaction)
the firmware won't recover and will stay in this broken state.

As a workaround check if the firmware is in PCON state after resume and
if so wait until the correct DPCD values are returned. For this we
compare the branch descriptor with the one we cached during init time.
If the firmware was in the LS state, we skip the w/a and continue as
before.

Cc: Shashank Sharma 
Cc: Ville Syrjälä 
Cc: Jani Nikula 
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/intel_dp.c |  2 +-
 drivers/gpu/drm/i915/intel_drv.h|  6 -
 drivers/gpu/drm/i915/intel_lspcon.c | 52 ++---
 3 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index e90211e..ec031db 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
intel_dp->DP = DP;
 }
 
-static bool
+bool
 intel_dp_read_dpcd(struct intel_dp *intel_dp)
 {
if (drm_dp_dpcd_read(_dp->aux, 0x000, intel_dp->dpcd,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a35e241..9a2366e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -972,7 +972,9 @@ struct intel_dp {
 struct intel_lspcon {
bool active;
enum drm_lspcon_mode mode;
-   struct drm_dp_aux *aux;
+   struct intel_dp *intel_dp;
+   bool desc_valid;
+   struct intel_dp_desc desc;
 };
 
 struct intel_digital_port {
@@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int 
lane_count)
 }
 
 bool
+intel_dp_read_dpcd(struct intel_dp *intel_dp);
+bool
 intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
 void
 intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c 
b/drivers/gpu/drm/i915/intel_lspcon.c
index d2c8cb2..54c6173 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -30,7 +30,7 @@
 static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon 
*lspcon)
 {
enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
-   struct i2c_adapter *adapter = >aux->ddc;
+   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
 
if (drm_lspcon_get_mode(adapter, _mode))
DRM_ERROR("Error reading LSPCON mode\n");
@@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
 {
int err;
enum drm_lspcon_mode current_mode;
-   struct i2c_adapter *adapter = >aux->ddc;
+   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
 
err = drm_lspcon_get_mode(adapter, _mode);
if (err) {
@@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
 static bool lspcon_probe(struct intel_lspcon *lspcon)
 {
enum drm_dp_dual_mode_type adaptor_type;
-   struct i2c_adapter *adapter = >aux->ddc;
+   struct i2c_adapter *adapter = >intel_dp->aux.ddc;
 
/* Lets probe the adaptor and check its type */
adaptor_type = drm_dp_dual_mode_detect(adapter);
@@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
return true;
 }
 
+static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
+{
+   unsigned long start = jiffies;
+
+   if (!lspcon->desc_valid)
+   return;
+
+   while (1) {
+   struct intel_dp_desc desc;
+
+   /*
+* The w/a only applies in PCON mode and we don't expect any
+* AUX errors.
+*/
+   if (!intel_dp_read_desc(lspcon->intel_dp, ))
+   return;
+
+   if (!memcmp(>desc, , sizeof(desc))) {
+   DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u 
ms\n",
+ jiffies_to_msecs(jiffies - start));
+   return;
+   }
+
+   if (time_after(jiffies, start + msecs_to_jiffies(1000)))
+   break;
+
+   msleep(10);
+   }
+
+   DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
+}
+
 void lspcon_resume(struct intel_lspcon *lspcon)
 {
+   lspcon_resume_in_pcon_wa(lspcon);
+
if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
DRM_ERROR("LSPCON resume failed\n");
else

[Intel-gfx] [PATCH 1/2] drm/i915/dp: Print full branch/sink descriptor for all outputs

2016-10-20 Thread Imre Deak
Extend the branch/sink descriptor info with the missing device ID
field and print this info for eDP and LSPCON connectors too.

Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/intel_dp.c | 83 +++--
 drivers/gpu/drm/i915/intel_drv.h| 13 ++
 drivers/gpu/drm/i915/intel_lspcon.c |  7 
 3 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 88f3b74..e90211e 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1442,42 +1442,34 @@ static void intel_dp_print_rates(struct intel_dp 
*intel_dp)
DRM_DEBUG_KMS("common rates: %s\n", str);
 }
 
-static void intel_dp_print_hw_revision(struct intel_dp *intel_dp)
+static bool intel_dp_is_branch(struct intel_dp *intel_dp)
 {
-   uint8_t rev;
-   int len;
-
-   if ((drm_debug & DRM_UT_KMS) == 0)
-   return;
-
-   if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
- DP_DWN_STRM_PORT_PRESENT))
-   return;
-
-   len = drm_dp_dpcd_read(_dp->aux, DP_BRANCH_HW_REV, , 1);
-   if (len < 0)
-   return;
-
-   DRM_DEBUG_KMS("sink hw revision: %d.%d\n", (rev & 0xf0) >> 4, rev & 
0xf);
+   return intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
+  DP_DWN_STRM_PORT_PRESENT;
 }
 
-static void intel_dp_print_sw_revision(struct intel_dp *intel_dp)
+bool
+intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
 {
-   uint8_t rev[2];
-   int len;
+   u32 base = intel_dp_is_branch(intel_dp) ? DP_BRANCH_OUI : DP_SINK_OUI;
 
-   if ((drm_debug & DRM_UT_KMS) == 0)
-   return;
+   return drm_dp_dpcd_read(_dp->aux, base, desc, sizeof(*desc)) ==
+  sizeof(*desc);
+}
 
-   if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
- DP_DWN_STRM_PORT_PRESENT))
-   return;
+void
+intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
+{
+   const char *dev_type = intel_dp_is_branch(intel_dp) ? "branch" : "sink";
+   bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] &
+  DP_OUI_SUPPORT;
 
-   len = drm_dp_dpcd_read(_dp->aux, DP_BRANCH_SW_REV, , 2);
-   if (len < 0)
-   return;
-
-   DRM_DEBUG_KMS("sink sw revision: %d.%d\n", rev[0], rev[1]);
+   DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %.*s HW-rev %d.%d SW-rev 
%d.%d\n",
+ dev_type,
+ (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)",
+ (int)sizeof(desc->device_id), desc->device_id,
+ (desc->hw_rev & 0xf0) >> 4, (desc->hw_rev & 0xf),
+ desc->sw_major_rev, desc->sw_minor_rev);
 }
 
 static int rate_to_index(int find, const int *rates)
@@ -3519,6 +3511,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
if (!intel_dp_read_dpcd(intel_dp))
return false;
 
+   if (drm_debug & DRM_UT_KMS) {
+   struct intel_dp_desc desc;
+
+   if (intel_dp_read_desc(intel_dp, ))
+   intel_dp_print_desc(intel_dp, );
+   }
+
if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
@@ -3621,23 +3620,6 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
return true;
 }
 
-static void
-intel_dp_probe_oui(struct intel_dp *intel_dp)
-{
-   u8 buf[3];
-
-   if (!(intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT))
-   return;
-
-   if (drm_dp_dpcd_read(_dp->aux, DP_SINK_OUI, buf, 3) == 3)
-   DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n",
- buf[0], buf[1], buf[2]);
-
-   if (drm_dp_dpcd_read(_dp->aux, DP_BRANCH_OUI, buf, 3) == 3)
-   DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n",
- buf[0], buf[1], buf[2]);
-}
-
 static bool
 intel_dp_can_mst(struct intel_dp *intel_dp)
 {
@@ -4410,11 +4392,12 @@ intel_dp_long_pulse(struct intel_connector 
*intel_connector)
  yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
 
intel_dp_print_rates(intel_dp);
+   if (drm_debug & DRM_UT_KMS) {
+   struct intel_dp_desc desc;
 
-   intel_dp_probe_oui(intel_dp);
-
-   intel_dp_print_hw_revision(intel_dp);
-   intel_dp_print_sw_revision(intel_dp);
+   if (intel_dp_read_desc(intel_dp, ))
+   intel_dp_print_desc(intel_dp, );
+   }
 
intel_dp_configure_mst(intel_dp);
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c06a33e..a35e241 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -883,6 +883,14 @@ enum link_m_n_set {
M2_N2
 };
 
+struct intel_dp_desc {
+   u8 

Re: [Intel-gfx] [PATCH] rtc: cmos: Don't enable interrupts in the middle of the interrupt handler

2016-10-20 Thread Alexandre Belloni
On 19/10/2016 at 21:02:04 +0300, ville.syrj...@linux.intel.com wrote :
> From: Ville Syrjälä 
> 
> Using spin_lock_irq()/spin_unlock_irq() from within the interrupt
> handler is a no-no. Let's save/restore the flags to avoid turning on
> interrupts prematurely.
> 
> We hit this in a bunch of our CI systems, but for whatever reason I
> wasn't able to reproduce on my own machine, so this fix is just
> based on the backtrace.
> 
> [  202.634918] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:2729 
> trace_hardirqs_on_caller+0x113/0x1b0
> [  202.634919] DEBUG_LOCKS_WARN_ON(current->hardirq_context)
> [  202.634929] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal 
> intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel 
> lpc_ich snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi 
> snd_hda_codec snd_hwdep i2c_designware_platform i2c_designware_core 
> snd_hda_core mei_me mei snd_pcm r8169 mii sdhci_acpi sdhci mmc_core i2c_hid 
> [last unloaded: i915]
> [  202.634930] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U  
> 4.9.0-rc1-CI-CI_DRM_1734+ #1
> [  202.634931] Hardware name: GIGABYTE M4HM87P-00/M4HM87P-00, BIOS F6 
> 12/10/2014
> [  202.634933]  88011ea03d68 8142dce5 88011ea03db8 
> 
> [  202.634934]  88011ea03da8 8107e496 0aa90002 
> 81e249a0
> [  202.634935]  81815637 82e7c280  
> 0004
> [  202.634936] Call Trace:
> [  202.634939]  
> [  202.634939]  [] dump_stack+0x67/0x92
> [  202.634941]  [] __warn+0xc6/0xe0
> [  202.634944]  [] ? _raw_spin_unlock_irq+0x27/0x50
> [  202.634945]  [] warn_slowpath_fmt+0x4a/0x50
> [  202.634946]  [] trace_hardirqs_on_caller+0x113/0x1b0
> [  202.634948]  [] trace_hardirqs_on+0xd/0x10
> [  202.634949]  [] _raw_spin_unlock_irq+0x27/0x50
> [  202.634951]  [] rtc_handler+0x32/0xa0
> [  202.634954]  [] acpi_ev_fixed_event_detect+0xd4/0xfb
> [  202.634956]  [] acpi_ev_sci_xrupt_handler+0xf/0x2d
> [  202.634957]  [] acpi_irq+0x11/0x2c
> [  202.634960]  [] __handle_irq_event_percpu+0x58/0x370
> [  202.634961]  [] handle_irq_event_percpu+0x1e/0x50
> [  202.634962]  [] handle_irq_event+0x34/0x60
> [  202.634963]  [] handle_fasteoi_irq+0xa6/0x170
> [  202.634966]  [] handle_irq+0x15/0x20
> [  202.634967]  [] do_IRQ+0x68/0x130
> [  202.634968]  [] common_interrupt+0x89/0x89
> [  202.634970]  
> [  202.634970]  [] ? mwait_idle+0x93/0x210
> [  202.634971]  [] ? mwait_idle+0x8a/0x210
> [  202.634972]  [] arch_cpu_idle+0xa/0x10
> [  202.634973]  [] default_idle_call+0x1e/0x30
> [  202.634974]  [] cpu_startup_entry+0x17c/0x1f0
> [  202.634976]  [] rest_init+0x127/0x130
> [  202.634978]  [] start_kernel+0x3f6/0x403
> [  202.634980]  [] x86_64_start_reservations+0x2a/0x2c
> [  202.634981]  [] x86_64_start_kernel+0x173/0x186
> [  202.634982] ---[ end trace 293c99618fa08d34 ]---
> 
> Cc: Gabriele Mazzotta 
> Cc: Alexandre Belloni 
> Fixes: 983bf1256edb ("rtc: cmos: Clear ACPI-driven alarms upon resume")
> Signed-off-by: Ville Syrjälä 
> ---
>  drivers/rtc/rtc-cmos.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
Applied, thanks.

-- 
Alexandre Belloni, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 12/41] drm/i915: Introduce an internal allocator for disposable private objects

2016-10-20 Thread Tvrtko Ursulin


On 20/10/2016 16:03, Chris Wilson wrote:

Quite a few of our objects used for internal hardware programming do not
benefit from being swappable or from being zero initialised. As such
they do not benefit from using a shmemfs backing storage and since they
are internal and never directly exposed to the user, we do not need to
worry about providing a filp. For these we can use an
drm_i915_gem_object wrapper around a sg_table of plain struct page. They
are not swap backed and not automatically pinned. If they are reaped
by the shrinker, the pages are released and the contents discarded. For
the internal use case, this is fine as for example, ringbuffers are
pinned from being written by a request to be read by the hardware. Once
they are idle, they can be discarded entirely. As such they are a good
match for execlist ringbuffers and a small variety of other internal
objects.

In the first iteration, this is limited to the scratch batch buffers we
use (for command parsing and state initialisation).


And the status page.



v2: Allocate physically contiguous pages, where possible.
v3: Reduce maximum order on subsequent requests following an allocation
failure.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/Makefile|   1 +
 drivers/gpu/drm/i915/i915_drv.h  |   5 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  27 ++---
 drivers/gpu/drm/i915/i915_gem_internal.c | 167 +++
 drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c   |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  14 ++-
 7 files changed, 194 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 612340097f4b..7faa04c91e1a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
  i915_gem_execbuffer.o \
  i915_gem_fence.o \
  i915_gem_gtt.o \
+ i915_gem_internal.o \
  i915_gem.o \
  i915_gem_render_state.o \
  i915_gem_request.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e93c3797d90..e267e20bdcdb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3543,6 +3543,11 @@ i915_gem_object_create_stolen_for_preallocated(struct 
drm_device *dev,
   u32 gtt_offset,
   u32 size);

+/* i915_gem_internal.c */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
+   unsigned int size);
+
 /* i915_gem_shrinker.c */
 unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
  unsigned long target,
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index cb25cad3318c..aa4e1e043b4e 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
size_t size)
 {
struct drm_i915_gem_object *obj = NULL;
-   struct drm_i915_gem_object *tmp, *next;
+   struct drm_i915_gem_object *tmp;
struct list_head *list;
-   int n;
+   int n, ret;

lockdep_assert_held(>engine->i915->drm.struct_mutex);

@@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
n = ARRAY_SIZE(pool->cache_list) - 1;
list = >cache_list[n];

-   list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
+   list_for_each_entry(tmp, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
if (!i915_gem_active_is_idle(>last_read[pool->engine->id],
 >base.dev->struct_mutex))
break;

-   /* While we're looping, do some clean up */
-   if (tmp->madv == __I915_MADV_PURGED) {
-   list_del(>batch_pool_link);
-   i915_gem_object_put(tmp);
-   continue;
-   }
-
if (tmp->base.size >= size) {
obj = tmp;
break;
@@ -132,19 +125,15 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
}

if (obj == NULL) {
-   int ret;
-
-   obj = i915_gem_object_create(>engine->i915->drm, size);
+   obj = i915_gem_object_create_internal(pool->engine->i915, size);
if (IS_ERR(obj))
return obj;
-
-   ret = i915_gem_object_get_pages(obj);
-   if (ret)
-   

[Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915/gen9: Remove WaEnableYV12BugFixInHalfSliceChicken7 (rev2)

2016-10-20 Thread Patchwork
== Series Details ==

Series: drm/i915/gen9: Remove WaEnableYV12BugFixInHalfSliceChicken7 (rev2)
URL   : https://patchwork.freedesktop.org/series/13949/
State : warning

== Summary ==

Series 13949v2 drm/i915/gen9: Remove WaEnableYV12BugFixInHalfSliceChicken7
https://patchwork.freedesktop.org/api/1.0/series/13949/revisions/2/mbox/

Test drv_module_reload_basic:
pass   -> DMESG-WARN (fi-ilk-650)
skip   -> PASS   (fi-skl-6770hq)
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-c:
pass   -> DMESG-WARN (fi-ivb-3770)

fi-bdw-5557u total:246  pass:231  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:246  pass:204  dwarn:0   dfail:0   fail:0   skip:42 
fi-bxt-t5700 total:246  pass:216  dwarn:0   dfail:0   fail:0   skip:30 
fi-byt-j1900 total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-byt-n2820 total:246  pass:211  dwarn:0   dfail:0   fail:0   skip:35 
fi-hsw-4770  total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650   total:246  pass:184  dwarn:1   dfail:0   fail:1   skip:60 
fi-ivb-3520m total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770  total:246  pass:220  dwarn:1   dfail:0   fail:0   skip:25 
fi-kbl-7200u total:246  pass:222  dwarn:0   dfail:0   fail:0   skip:24 
fi-skl-6260u total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:246  pass:222  dwarn:1   dfail:0   fail:0   skip:23 
fi-skl-6700k total:246  pass:221  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hqtotal:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:246  pass:210  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600  total:246  pass:209  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2777/

5113d7495dab3ea4d14a7698368c6be80f6c045c drm-intel-nightly: 
2016y-10m-20d-13h-31m-16s UTC integration manifest
9eee225 drm/i915/gen9: Remove WaEnableYV12BugFixInHalfSliceChicken7

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 15/41] drm/i915: Use a radixtree for random access to the object's backing storage

2016-10-20 Thread Tvrtko Ursulin


On 20/10/2016 16:03, Chris Wilson wrote:

A while ago we switched from a contiguous array of pages into an sglist,
for that was both more convenient for mapping to hardware and avoided
the requirement for a vmalloc array of pages on every object. However,
certain GEM API calls (like pwrite, pread as well as performing
relocations) do desire access to individual struct pages. A quick hack
was to introduce a cache of the last access such that finding the
following page was quick - this works so long as the caller desired
sequential access. Walking backwards, or multiple callers, still hits a
slow linear search for each page. One solution is to store each
successful lookup in a radix tree.

v2: Rewrite building the radixtree for clarity, hopefully.

v3: Rearrange execbuf to avoid calling i915_gem_object_get_sg() from
within an atomic section and so relax the allocation context to a simple
GFP_KERNEL and mutex.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  69 +---
 drivers/gpu/drm/i915/i915_gem.c | 185 +---
 drivers/gpu/drm/i915/i915_gem_stolen.c  |   4 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |   4 +-
 4 files changed, 199 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0897f43e7796..e2e48af8d41f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2273,9 +2273,12 @@ struct drm_i915_gem_object {

struct sg_table *pages;
int pages_pin_count;
-   struct get_page {
-   struct scatterlist *sg;
-   int last;
+   struct i915_gem_object_page_iter {
+   struct scatterlist *sg_pos;
+   unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+   struct radix_tree_root radix;
+   struct mutex lock; /* protects this cache */
} get_page;
void *mapping;

@@ -2478,6 +2481,14 @@ static __always_inline struct sgt_iter {
return s;
 }

+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+   ++sg;
+   if (unlikely(sg_is_chain(sg)))
+   sg = sg_chain_ptr(sg);
+   return sg;
+}
+
 /**
  * __sg_next - return the next scatterlist entry in a list
  * @sg:The current sg entry
@@ -2492,9 +2503,7 @@ static inline struct scatterlist *__sg_next(struct 
scatterlist *sg)
 #ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
 #endif
-   return sg_is_last(sg) ? NULL :
-   likely(!sg_is_chain(++sg)) ? sg :
-   sg_chain_ptr(sg);
+   return sg_is_last(sg) ? NULL : sg_next(sg);
 }

 /**
@@ -3172,45 +3181,21 @@ static inline int __sg_page_count(struct scatterlist 
*sg)
return sg->length >> PAGE_SHIFT;
 }

-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
-
-static inline dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
-{
-   if (n < obj->get_page.last) {
-   obj->get_page.sg = obj->pages->sgl;
-   obj->get_page.last = 0;
-   }
-
-   while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
-   obj->get_page.last += __sg_page_count(obj->get_page.sg++);
-   if (unlikely(sg_is_chain(obj->get_page.sg)))
-   obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
-   }
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+  unsigned int n, unsigned int *offset);

-   return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << 
PAGE_SHIFT);
-}
-
-static inline struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
-{
-   if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
-   return NULL;
-
-   if (n < obj->get_page.last) {
-   obj->get_page.sg = obj->pages->sgl;
-   obj->get_page.last = 0;
-   }
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+unsigned int n);

-   while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
-   obj->get_page.last += __sg_page_count(obj->get_page.sg++);
-   if (unlikely(sg_is_chain(obj->get_page.sg)))
-   obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
-   }
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+  unsigned int n);

-   return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
-}
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+   unsigned long n);

 static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d596b1f9e969..a9ea20d53e23 

Re: [Intel-gfx] [PATCH 08/41] drm/i915: Remove superfluous wait_for_error() from throttle-ioctl

2016-10-20 Thread Joonas Lahtinen
On to, 2016-10-20 at 16:03 +0100, Chris Wilson wrote:

> Reviewed-by: Joonas Lahtinen " at the end of line.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 39/41] drm/i915: Enable multiple timelines

2016-10-20 Thread Chris Wilson
On Thu, Oct 20, 2016 at 06:26:04PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-14 at 13:18 +0100, Chris Wilson wrote:
> > With the infrastructure converted over to tracking multiple timelines in
> > the GEM API whilst preserving the efficiency of using a single execution
> > timeline internally, we can now assign a separate timeline to every
> > context with full-ppgtt.
> > 
> > Signed-off-by: Chris Wilson 
> 
> Changelog would be nice, but seems to address the major issues.

v2: Add a comment to indicate the xfer between timelines upon
submission.

? Are we on the right thread?

Most the earliest issues where addressed by doing them before we got to
the enabling patch.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH RESEND v2] drm/i915/gen9: Remove WaEnableYV12BugFixInHalfSliceChicken7

2016-10-20 Thread Arkadiusz Hiler
Dropping WA because it was for early steppings.

It is fixed in newer preproduction and all production revisions.

v2: add references, updated commit message

References: HSD#2126385, HSD#2131381, BSID#0764
Cc: Mika Kuoppala 
Cc: Chris Wilson 
Cc: Michal Winiarski 
Signed-off-by: Arkadiusz Hiler 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e107455..32786ba 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -849,10 +849,8 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*engine)
 */
}
 
-   /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */
/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
- GEN9_ENABLE_YV12_BUGFIX |
  GEN9_ENABLE_GPGPU_PREEMPTION);
 
/* Wa4x4STCOptimizationDisable:skl,bxt,kbl */
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 39/41] drm/i915: Enable multiple timelines

2016-10-20 Thread Joonas Lahtinen
On pe, 2016-10-14 at 13:18 +0100, Chris Wilson wrote:
> With the infrastructure converted over to tracking multiple timelines in
> the GEM API whilst preserving the efficiency of using a single execution
> timeline internally, we can now assign a separate timeline to every
> context with full-ppgtt.
> 
> Signed-off-by: Chris Wilson 

Changelog would be nice, but seems to address the major issues.

Reviewed-by: Joonas Lahtinen 

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 16/18] drm/i915: Enable multiple timelines

2016-10-20 Thread Joonas Lahtinen
On to, 2016-10-20 at 13:49 +0100, Chris Wilson wrote:
> On Mon, Sep 19, 2016 at 06:52:13PM +0300, Joonas Lahtinen wrote:
> > 
> > On ke, 2016-09-14 at 07:52 +0100, Chris Wilson wrote:
> > > 
> > > @@ -315,17 +304,42 @@ submit_notify(struct i915_sw_fence *fence, enum 
> > > i915_sw_fence_notify state)
> > >  {
> > >   struct drm_i915_gem_request *request =
> > >   container_of(fence, typeof(*request), submit);
> > > + struct intel_timeline *timeline;
> > > + struct intel_engine_cs *engine = request->engine;
> > > + unsigned long flags;
> > > + u32 seqno;
> > >  
> > >   /* Will be called from irq-context when using foreign DMA fences */
> > >  
> > > - switch (state) {
> > > - case FENCE_COMPLETE:
> > > - request->engine->submit_request(request);
> > > - break;
> > > + if (state != FENCE_COMPLETE)
> > > + return NOTIFY_DONE;
> > >  
> > > - case FENCE_FREE:
> > > - break;
> > > - }
> > > + timeline = engine->timeline;
> > > + GEM_BUG_ON(timeline == request->timeline);
> > 
> > Umm, why this BUG_ON?
> 
> To document that the intent here is to move from the per-context
> timeline onto the global per-engine timeline. If the request was already
> on the engine->timeline bad things would happen, at the very simplest a
> deadlock here.

I see. I would have lifted the assignment and the BUG_ON before actual
code, like elsewhere. But I can live with this too.

The new patch seems to have got rid of the emit_request weirdness too,
so I'll add by R-b there.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 22/41] drm/i915: Implement pwrite without struct-mutex

2016-10-20 Thread Chris Wilson
We only need struct_mutex within pwrite for a brief window where we need
to serialise with rendering and control our cache domains. Elsewhere we
can rely on the backing storage being pinned, and forgive userspace any
races against us.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem.c | 353 ++--
 1 file changed, 122 insertions(+), 231 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ab119ea49634..ba311a104564 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1115,72 +1115,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
  * page faults in the source data
  */
 
-static inline int
-fast_user_write(struct io_mapping *mapping,
-   loff_t page_base, int page_offset,
-   char __user *user_data,
-   int length)
+static inline bool
+ggtt_write(struct io_mapping *mapping,
+  loff_t base, int offset,
+  char __user *user_data, int length)
 {
-   void __iomem *vaddr_atomic;
void *vaddr;
unsigned long unwritten;
 
-   vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
/* We can use the cpu mem copy function because this is X86. */
-   vaddr = (void __force*)vaddr_atomic + page_offset;
-   unwritten = __copy_from_user_inatomic_nocache(vaddr,
+   vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
+   unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
  user_data, length);
-   io_mapping_unmap_atomic(vaddr_atomic);
-   return unwritten;
-}
-
-static inline unsigned long
-slow_user_access(struct io_mapping *mapping,
-unsigned long page_base, int page_offset,
-char __user *user_data,
-unsigned long length, bool pwrite)
-{
-   void __iomem *ioaddr;
-   void *vaddr;
-   unsigned long unwritten;
-
-   ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
-   /* We can use the cpu mem copy function because this is X86. */
-   vaddr = (void __force *)ioaddr + page_offset;
-   if (pwrite)
-   unwritten = __copy_from_user(vaddr, user_data, length);
-   else
-   unwritten = __copy_to_user(user_data, vaddr, length);
+   io_mapping_unmap_atomic(vaddr);
+   if (unwritten) {
+   vaddr = (void __force *)
+   io_mapping_map_wc(mapping, base, PAGE_SIZE);
+   unwritten = copy_from_user(vaddr + offset, user_data, length);
+   io_mapping_unmap(vaddr);
+   }
 
-   io_mapping_unmap(ioaddr);
return unwritten;
 }
 
 /**
  * This is the fast pwrite path, where we copy the data directly from the
  * user into the GTT, uncached.
- * @i915: i915 device private data
- * @obj: i915 gem object
+ * @obj: i915 GEM object
  * @args: pwrite arguments structure
- * @file: drm file pointer
  */
 static int
-i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
-struct drm_i915_gem_object *obj,
-struct drm_i915_gem_pwrite *args,
-struct drm_file *file)
+i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
+const struct drm_i915_gem_pwrite *args)
 {
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = >ggtt;
-   struct drm_device *dev = obj->base.dev;
-   struct i915_vma *vma;
struct drm_mm_node node;
-   uint64_t remain, offset;
-   char __user *user_data;
+   struct i915_vma *vma;
+   u64 remain, offset;
+   void __user *user_data;
int ret;
-   bool hit_slow_path = false;
 
-   if (i915_gem_object_is_tiled(obj))
-   return -EFAULT;
+   ret = mutex_lock_interruptible(>drm.struct_mutex);
+   if (ret)
+   return ret;
 
intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
@@ -1197,21 +1175,17 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
if (IS_ERR(vma)) {
ret = insert_mappable_node(ggtt, , PAGE_SIZE);
if (ret)
-   goto out;
-
-   ret = i915_gem_object_pin_pages(obj);
-   if (ret) {
-   remove_mappable_node();
-   goto out;
-   }
+   goto out_unlock;
+   GEM_BUG_ON(!node.allocated);
}
 
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
goto out_unpin;
 
+   mutex_unlock(>drm.struct_mutex);
+
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-   obj->mm.dirty = true;
 
user_data = u64_to_user_ptr(args->data_ptr);
offset = 

[Intel-gfx] [PATCH 13/41] drm/i915: Reuse the active golden render state batch

2016-10-20 Thread Chris Wilson
The golden render state is constant, but we recreate the batch setting
it up for every new context. If we keep that batch in a volatile cache
we can safely reuse it whenever we need to initialise a new context. We
mark the pages as purgeable and use the shrinker to recover pages from
the batch whenever we face memory pressues, recreating that batch afresh
on the next new context.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 184 +--
 drivers/gpu/drm/i915/i915_gem_render_state.h |   4 +-
 drivers/gpu/drm/i915/intel_engine_cs.c   |   5 +
 drivers/gpu/drm/i915/intel_lrc.c |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   3 +
 6 files changed, 129 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 217e0b58b930..9625e1a662ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,17 +28,19 @@
 #include "i915_drv.h"
 #include "intel_renderstate.h"
 
-struct render_state {
+struct intel_render_state {
const struct intel_renderstate_rodata *rodata;
struct i915_vma *vma;
-   u32 aux_batch_size;
-   u32 aux_batch_offset;
+   u32 batch_offset;
+   u32 batch_size;
+   u32 aux_offset;
+   u32 aux_size;
 };
 
 static const struct intel_renderstate_rodata *
-render_state_get_rodata(const struct drm_i915_gem_request *req)
+render_state_get_rodata(const struct intel_engine_cs *engine)
 {
-   switch (INTEL_GEN(req->i915)) {
+   switch (INTEL_GEN(engine->i915)) {
case 6:
return _null_state;
case 7:
@@ -63,29 +65,27 @@ render_state_get_rodata(const struct drm_i915_gem_request 
*req)
  */
 #define OUT_BATCH(batch, i, val)   \
do {\
-   if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {  \
-   ret = -ENOSPC;  \
-   goto err_out;   \
-   }   \
+   if ((i) >= PAGE_SIZE / sizeof(u32)) \
+   goto err;   \
(batch)[(i)++] = (val); \
} while(0)
 
-static int render_state_setup(struct render_state *so)
+static int render_state_setup(struct intel_render_state *so,
+ struct drm_i915_private *i915)
 {
-   struct drm_i915_private *dev_priv = to_i915(so->vma->vm->dev);
const struct intel_renderstate_rodata *rodata = so->rodata;
-   const bool has_64bit_reloc = INTEL_GEN(dev_priv) >= 8;
+   const bool has_64bit_reloc = INTEL_GEN(i915) >= 8;
+   struct drm_i915_gem_object *obj = so->vma->obj;
unsigned int i = 0, reloc_index = 0;
-   struct page *page;
+   unsigned int needs_clflush;
u32 *d;
int ret;
 
-   ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
+   ret = i915_gem_obj_prepare_shmem_write(obj, _clflush);
if (ret)
return ret;
 
-   page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
-   d = kmap(page);
+   d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
 
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
@@ -95,10 +95,8 @@ static int render_state_setup(struct render_state *so)
s = lower_32_bits(r);
if (has_64bit_reloc) {
if (i + 1 >= rodata->batch_items ||
-   rodata->batch[i + 1] != 0) {
-   ret = -EINVAL;
-   goto err_out;
-   }
+   rodata->batch[i + 1] != 0)
+   goto err;
 
d[i++] = s;
s = upper_32_bits(r);
@@ -110,12 +108,20 @@ static int render_state_setup(struct render_state *so)
d[i++] = s;
}
 
+   if (rodata->reloc[reloc_index] != -1) {
+   DRM_ERROR("only %d relocs resolved\n", reloc_index);
+   goto err;
+   }
+
+   so->batch_offset = so->vma->node.start;
+   so->batch_size = rodata->batch_items * sizeof(u32);
+
while (i % CACHELINE_DWORDS)
OUT_BATCH(d, i, MI_NOOP);
 
-   so->aux_batch_offset = i * sizeof(u32);
+   so->aux_offset = i * sizeof(u32);
 
-   if (HAS_POOLED_EU(dev_priv)) {
+   if (HAS_POOLED_EU(i915)) {
/*
 * We always program 3x6 pool config but 

[Intel-gfx] [PATCH 23/41] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain

2016-10-20 Thread Chris Wilson
As we can locklessly (well struct_mutex-lessly) acquire the backing
storage, do so in set-domain-ioctl to reduce the contention on the
struct_mutex.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem.c | 99 +
 1 file changed, 61 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ba311a104564..697a83823920 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1451,6 +1451,30 @@ write_origin(struct drm_i915_gem_object *obj, unsigned 
domain)
obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+   struct drm_i915_private *i915;
+   struct list_head *list;
+   struct i915_vma *vma;
+
+   list_for_each_entry(vma, >vma_list, obj_link) {
+   if (!i915_vma_is_ggtt(vma))
+   continue;
+
+   if (i915_vma_is_active(vma))
+   continue;
+
+   if (!drm_mm_node_allocated(>node))
+   continue;
+
+   list_move_tail(>vm_link, >vm->inactive_list);
+   }
+
+   i915 = to_i915(obj->base.dev);
+   list = obj->bind_count ? >mm.bound_list : >mm.unbound_list;
+   list_move_tail(>global_list, list);
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1466,7 +1490,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
struct drm_i915_gem_object *obj;
uint32_t read_domains = args->read_domains;
uint32_t write_domain = args->write_domain;
-   int ret;
+   int err;
 
/* Only handle setting domains to types used by the CPU. */
if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
@@ -1486,33 +1510,48 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
 * We will repeat the flush holding the lock in the normal manner
 * to catch cases where we are gazumped.
 */
-   ret = i915_gem_object_wait(obj,
+   err = i915_gem_object_wait(obj,
   I915_WAIT_INTERRUPTIBLE |
   (write_domain ? I915_WAIT_ALL : 0),
   MAX_SCHEDULE_TIMEOUT,
   to_rps_client(file));
-   if (ret)
-   goto err;
+   if (err)
+   goto out_unlocked;
 
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   goto err;
+   /* Flush and acquire obj->pages so that we are coherent through
+* direct access in memory with previous cached writes through
+* shmemfs and that our cache domain tracking remains valid.
+* For example, if the obj->filp was moved to swap without us
+* being notified and releasing the pages, we would mistakenly
+* continue to assume that the obj remained out of the CPU cached
+* domain.
+*/
+   err = i915_gem_object_pin_pages(obj);
+   if (err)
+   goto out_unlocked;
+
+   err = i915_mutex_lock_interruptible(dev);
+   if (err)
+   goto out_pages;
 
if (read_domains & I915_GEM_DOMAIN_GTT)
-   ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+   err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
else
-   ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+   err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 
-   if (write_domain != 0)
-   intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+   /* And bump the LRU for this access */
+   i915_gem_object_bump_inactive_ggtt(obj);
 
-   i915_gem_object_put(obj);
mutex_unlock(>struct_mutex);
-   return ret;
 
-err:
+   if (write_domain != 0)
+   intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+
+out_pages:
+   i915_gem_object_unpin_pages(obj);
+out_unlocked:
i915_gem_object_put_unlocked(obj);
-   return ret;
+   return err;
 }
 
 /**
@@ -1733,6 +1772,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct 
vm_fault *vmf)
if (ret)
goto err;
 
+   ret = i915_gem_object_pin_pages(obj);
+   if (ret)
+   goto err;
+
intel_runtime_pm_get(dev_priv);
 
ret = i915_mutex_lock_interruptible(dev);
@@ -1815,6 +1858,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct 
vm_fault *vmf)
mutex_unlock(>struct_mutex);
 err_rpm:
intel_runtime_pm_put(dev_priv);
+   i915_gem_object_unpin_pages(obj);
 err:
switch (ret) {
case -EIO:
@@ -3272,24 +3316,6 @@ 

Re: [Intel-gfx] [PATCH] drm/i915: Add i915 perf infrastructure

2016-10-20 Thread Joonas Lahtinen
On ke, 2016-10-19 at 17:35 +0100, Robert Bragg wrote:
> I'll add a default: with MISSING_CASE as that looks like an i915-
> specific convention; though it seems like a real shame to defer
> missing case issues to runtime errors instead of taking advantage of
> the compiler complaining at build time that a case has been
> forgotten.

I think the key point here is not "having MISSING_CASE", but "not
having BUG".

There has been talk about using compile time checking more effectively,
so adding default is not needed. You can keep similar code construct
but reduce into WARN_ONCE or so.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/gvt: Implement WaForceWakeRenderDuringMmioTLBInvalidate

2016-10-20 Thread Arkadiusz Hiler
On Thu, Oct 20, 2016 at 05:29:36PM +0300, Mika Kuoppala wrote:
> Arkadiusz Hiler  writes:
> 
> > When invalidating RCS TLB the device can enter RC6 state interrupting
> > the process, therefore the need for render forcewake for the whole
> > procedure.
> >
> > This WA is needed for all production SKL SKUs.
> >
> > References: HSD#2136899, HSD#1404391274
> > Cc: Mika Kuoppala 
> > Cc: Zhenyu Wang 
> > Signed-off-by: Arkadiusz Hiler 
> > ---
> >  drivers/gpu/drm/i915/gvt/render.c | 11 +++
> >  1 file changed, 11 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gvt/render.c 
> > b/drivers/gpu/drm/i915/gvt/render.c
> > index f54ab85..f5000ea 100644
> > --- a/drivers/gpu/drm/i915/gvt/render.c
> > +++ b/drivers/gpu/drm/i915/gvt/render.c
> > @@ -134,11 +134,22 @@ static void handle_tlb_pending_event(struct 
> > intel_vgpu *vgpu, int ring_id)
> >  
> > reg = _MMIO(regs[ring_id]);
> >
> 
> Ok not so familiar with the gvt side but I assume this is the host
> side code and thus the vgpu is not active at this stage.

That's my understanding as well. It's a code that is setting up gvt for
further use (shadow context to be exact). It's called indirectly from
intel_gvt_create_vgpu.

We should wait for Zhenyu to verify that.

> Then you could avoid some of the implicit fw dancing
> by:
> 
> diff --git a/drivers/gpu/drm/i915/gvt/render.c 
> b/drivers/gpu/drm/i915/gvt/render.c
> index feebb65..93ba156 100644
> --- a/drivers/gpu/drm/i915/gvt/render.c
> +++ b/drivers/gpu/drm/i915/gvt/render.c
> @@ -118,6 +118,7 @@ static u32 gen9_render_mocs_L3[32];
>  static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
>  {
> struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
> +   enum forcewake_domains fw;
> i915_reg_t reg;
> u32 regs[] = {
> [RCS] = 0x4260,
> @@ -135,11 +136,21 @@ static void handle_tlb_pending_event(struct intel_vgpu 
> *vgpu, int ring_id)
>  
> reg = _MMIO(regs[ring_id]);
>  
> -   I915_WRITE(reg, 0x1);
> +   fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
> +   FW_REG_READ | FW_REG_WRITE);
>  
> -   if (wait_for_atomic((I915_READ(reg) == 0), 50))
> +   if (ring_id == RCS && IS_SKYLAKE(dev_priv))
> +   fw |= FORCEWAKE_RENDER;
> +
> +   intel_uncore_forcewake_get(dev_priv, fw);
> +
> +   I915_WRITE_FW(reg, 0x1);
> +
> +   if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
> gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id);
>  
> +   intel_uncore_forcewake_put(dev_priv, fw);
> +
> 

I can go with it, although I do not have strong preference. I think my
version is a little bit easier to follow, but his is less error prone,
as you check for the WA SKU only once, during setting the FW.

Any recommendations?

-- 
Cheers,
Arek
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] shmem: Support for registration of Driver/file owner specific ops

2016-10-20 Thread Joonas Lahtinen
On ke, 2016-10-19 at 20:41 +0530, akash goel wrote:
> On Thu, Mar 24, 2016 at 5:41 PM, Joonas Lahtinen
> >  wrote:
> > On ke, 2016-03-23 at 11:39 +0530, akash.g...@intel.com wrote:
> > > @@ -34,11 +34,28 @@ struct shmem_sb_info {
> > >   struct mempolicy *mpol; /* default memory policy for mappings */
> > >  };
> > > 
> > > +struct shmem_dev_info {
> > > + void *dev_private_data;
> > > + int (*dev_migratepage)(struct address_space *mapping,
> > > +struct page *newpage, struct page *page,
> > > +enum migrate_mode mode, void *dev_priv_data);
> > 
> > One might want to have a separate shmem_dev_operations struct or
> > similar.
> > 
> Sorry for the very late turnaround.
> 
> Sorry couldn't get your point here. Are you suggesting to rename the
> structure to shmem_dev_operations ?

I'm pretty sure I was after putting migratepage function pointer in
shmem_dev_operations struct, but I think that can be done once there
are more functions.

s/dev_private_data/private_data/ and s/dev_priv_data/private_data/
might be in order, too. I should be obvious from context.

> > > +};
> > > +
> > >  static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
> > >  {
> > >   return container_of(inode, struct shmem_inode_info, vfs_inode);
> > >  }
> > > 
> > > +static inline int shmem_set_device_ops(struct address_space *mapping,
> > > + struct shmem_dev_info *info)
> > > +{

This name could be shmem_set_dev_info, if there will be separate _ops
struct in future.

> > > + if (mapping->private_data != NULL)
> > > + return -EEXIST;
> > > +
> > 
> > I did a quick random peek and most set functions are just void and
> > override existing data. I'd suggest the same.
> > 
> > > 
> > > + mapping->private_data = info;
> > 
> Fine will change the return type to void and remove the check.
> 
> > 
> > Also, doesn't this kinda steal the mapping->private_data, might that be
> > unexpected for the user? I notice currently it's not being touched at
> > all.
> > 
> Sorry by User do you mean the shmem client who called shmem_file_setup() ?
> It seems clients are not expected to touch mapping->private_data and
> so shmemfs can safely use it.

If it's not used by others, should be fine. Not sure if WARN would be
in place, Chris?

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 38/41] drm/i915: Defer setting of global seqno on request to submission

2016-10-20 Thread Chris Wilson
Defer the assignment of the global seqno on a request to its submission.
In the next patch, we will only allocate the global seqno at that time,
here we are just enabling the wait-for-submission before wait-for-seqno
paths.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem_request.c  | 30 +++---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 12 
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 0e4b03c23b49..e6d6da8370fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -324,14 +324,32 @@ submit_notify(struct i915_sw_fence *fence, enum 
i915_sw_fence_notify state)
struct drm_i915_gem_request *request =
container_of(fence, typeof(*request), submit);
struct intel_engine_cs *engine = request->engine;
+   struct intel_timeline *timeline;
+   u32 seqno;
 
if (state != FENCE_COMPLETE)
return NOTIFY_DONE;
 
/* Will be called from irq-context when using foreign DMA fences */
 
-   engine->timeline->last_submitted_seqno = request->fence.seqno;
+   timeline = request->timeline;
 
+   seqno = request->fence.seqno;
+   GEM_BUG_ON(!seqno);
+   GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
+
+   GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
+   request->previous_seqno = timeline->last_submitted_seqno;
+   timeline->last_submitted_seqno = seqno;
+
+   /* We may be recursing from the signal callback of another i915 fence */
+   spin_lock_nested(>lock, SINGLE_DEPTH_NESTING);
+   request->global_seqno = seqno;
+   if (test_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, >fence.flags))
+   intel_engine_enable_signaling(request);
+   spin_unlock(>lock);
+
+   GEM_BUG_ON(!request->global_seqno);
engine->emit_breadcrumb(request,
request->ring->vaddr + request->postfix);
engine->submit_request(request);
@@ -427,10 +445,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
INIT_LIST_HEAD(>active_list);
req->i915 = dev_priv;
req->engine = engine;
-   req->global_seqno = req->fence.seqno;
req->ctx = i915_gem_context_get(ctx);
 
/* No zalloc, must clear what we need by hand */
+   req->global_seqno = 0;
req->previous_context = NULL;
req->file_priv = NULL;
req->batch = NULL;
@@ -704,15 +722,13 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
i915_sw_fence_await_sw_fence(>submit, >submit,
 >submitq);
 
-   GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-request->fence.seqno));
+   list_add_tail(>link, >requests);
 
-   request->emitted_jiffies = jiffies;
-   request->previous_seqno = timeline->last_pending_seqno;
timeline->last_pending_seqno = request->fence.seqno;
i915_gem_active_set(>last_request, request);
-   list_add_tail(>link, >requests);
+
list_add_tail(>ring_link, >request_list);
+   request->emitted_jiffies = jiffies;
 
i915_gem_mark_busy(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 7e65b415c535..594676363056 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -77,22 +77,26 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
 
 static void irq_enable(struct intel_engine_cs *engine)
 {
+   unsigned long flags;
+
/* Enabling the IRQ may miss the generation of the interrupt, but
 * we still need to force the barrier before reading the seqno,
 * just in case.
 */
engine->breadcrumbs.irq_posted = true;
 
-   spin_lock_irq(>i915->irq_lock);
+   spin_lock_irqsave(>i915->irq_lock, flags);
engine->irq_enable(engine);
-   spin_unlock_irq(>i915->irq_lock);
+   spin_unlock_irqrestore(>i915->irq_lock, flags);
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
-   spin_lock_irq(>i915->irq_lock);
+   unsigned long flags;
+
+   spin_lock_irqsave(>i915->irq_lock, flags);
engine->irq_disable(engine);
-   spin_unlock_irq(>i915->irq_lock);
+   spin_unlock_irqrestore(>i915->irq_lock, flags);
 
engine->breadcrumbs.irq_posted = false;
 }
-- 
2.9.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/gvt: Implement WaForceWakeRenderDuringMmioTLBInvalidate

2016-10-20 Thread Ville Syrjälä
On Thu, Oct 20, 2016 at 12:57:31PM +0200, Arkadiusz Hiler wrote:
> When invalidating RCS TLB the device can enter RC6 state interrupting
> the process, therefore the need for render forcewake for the whole
> procedure.
> 
> This WA is needed for all production SKL SKUs.
> 
> References: HSD#2136899, HSD#1404391274
> Cc: Mika Kuoppala 
> Cc: Zhenyu Wang 
> Signed-off-by: Arkadiusz Hiler 
> ---
>  drivers/gpu/drm/i915/gvt/render.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/render.c 
> b/drivers/gpu/drm/i915/gvt/render.c
> index f54ab85..f5000ea 100644
> --- a/drivers/gpu/drm/i915/gvt/render.c
> +++ b/drivers/gpu/drm/i915/gvt/render.c
> @@ -134,11 +134,22 @@ static void handle_tlb_pending_event(struct intel_vgpu 
> *vgpu, int ring_id)
>  
>   reg = _MMIO(regs[ring_id]);

Random drive by comment:
You should add the registers to i915_reg.h properly so that we don't get
this ugly _MMIO() stuff sprinkled all over the place.

>  
> + /* WaForceWakeRenderDuringMmioTLBInvalidate:skl
> +  * we need to put a forcewake when invalidating RCS TLB caches,
> +  * otherwise device can go to RC6 state and interrupt invalidation
> +  * process */
> + if (IS_SKYLAKE(dev_priv) && ring_id == RCS)
> + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_RENDER);
> +
>   I915_WRITE(reg, 0x1);
>  
>   if (wait_for_atomic((I915_READ(reg) == 0), 50))
>   gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id);
>  
> + if (IS_SKYLAKE(dev_priv) && ring_id == RCS)
> + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_RENDER);
> +
> +
>   gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
>  }
>  
> -- 
> 2.7.4
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm: Complete CEA modedb(VIC 1-107)

2016-10-20 Thread Sharma, Shashank

Regards

Shashank


On 10/20/2016 8:00 PM, Alex Deucher wrote:

On Thu, Oct 20, 2016 at 6:28 AM, Shashank Sharma
 wrote:

CEA-861-F specs defines new 4k video modes to be used with
HDMI 2.0 EDIDs. These modes start at VIC=93 and go all the
way till VIC=107.

Our existing CEA modedb contains only 64 modes (VIC=1 to VIC=64). Now
to be able to parse 4k modes using the existing techniques, we have
to complete the modedb (VIC=65 onwards).

This patch adds:
- Timings for existing CEA video modes (from VIC=65 till VIC=92)
- Newly added 4k modes (from VIC=93 to VIC=107).

Signed-off-by: Shashank Sharma 
Signed-off-by: Sonika Jindal 

Cc: Joes Abreu 

Isn't this the same as this patch:
https://lists.freedesktop.org/archives/dri-devel/2016-May/107463.html
and I think even a previous one?  Anyway, as long as one of the
patches makes it in, I'm happy.

Reviewed-by: Alex Deucher 

Thanks for the review alex,
Looks like that patch was similar, but one significant change would be 
in a previous patch series, we have implemented end-to-end aspect ratio 
support for
drm layer, and then added new aspect ratio as per CEA-861-specs. Else 
there was no use of just adding new aspects.

https://patchwork.kernel.org/patch/9379063/

I was waiting for that series to get merged, before we can use those 
aspect ratios mentioned in new 4k modes.


Regards
Shashank

---
  drivers/gpu/drm/drm_edid.c | 231 +
  1 file changed, 231 insertions(+)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 95de47b..0b97a1b 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -994,6 +994,237 @@ struct minimode {
2492, 2640, 0, 1080, 1084, 1089, 1125, 0,
DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+   /* 65 - 1280x720@24Hz */
+   { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 59400, 1280, 3040,
+  3080, 3300, 0, 720, 725, 730, 750, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 66 - 1280x720@25Hz */
+   { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 3700,
+  3740, 3960, 0, 720, 725, 730, 750, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 25, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 67 - 1280x720@30Hz */
+   { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 3040,
+  3080, 3300, 0, 720, 725, 730, 750, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 30, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 68 - 1280x720@50Hz */
+   { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1720,
+  1760, 1980, 0, 720, 725, 730, 750, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 69 - 1280x720@60Hz */
+   { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1390,
+  1430, 1650, 0, 720, 725, 730, 750, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 70 - 1280x720@100Hz */
+   { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 148500, 1280, 1720,
+  1760, 1980, 0, 720, 725, 730, 750, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 71 - 1280x720@120Hz */
+   { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 148500, 1280, 1390,
+  1430, 1650, 0, 720, 725, 730, 750, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 72 - 1920x1080@24Hz */
+   { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 74250, 1920, 2558,
+  2602, 2750, 0, 1080, 1084, 1089, 1125, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 24, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 73 - 1920x1080@25Hz */
+   { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 74250, 1920, 2448,
+  2492, 2640, 0, 1080, 1084, 1089, 1125, 0,
+  DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+ .vrefresh = 25, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_64_27, },
+   /* 74 - 1920x1080@30Hz */
+   { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 74250, 1920, 2008,
+  2052, 2200, 0, 1080, 1084, 1089, 1125, 0,
+  

[Intel-gfx] [PATCH 32/41] drm/i915: Rename ->emit_request to ->emit_breadcrumb

2016-10-20 Thread Chris Wilson
Now that the emission of the request tail and its submission to hardware
are two separate steps, engine->emit_request() is confusing.
engine->emit_request() is called to emit the breadcrumb commands for the
request into the ring, name it such (engine->emit_breadcrumb).

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem_request.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c| 10 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 16 
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 5c01b9548e72..5f643e082f2d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -685,8 +685,8 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
request->postfix = ring->tail;
 
/* Not allowed to fail! */
-   ret = engine->emit_request(request);
-   WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret);
+   ret = engine->emit_breadcrumb(request);
+   WARN(ret, "(%s)->emit_breadcrumb failed: %d!\n", engine->name, ret);
 
/* Sanity check that the reserved size was large enough. */
ret = ring->tail - request_start;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e0a9bf81774b..57dba458f185 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -440,7 +440,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
if (last)
/* WaIdleLiteRestore:bdw,skl
 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
-* as we resubmit the request. See gen8_emit_request()
+* as we resubmit the request. See gen8_emit_breadcrumb()
 * for where we prepare the padding after the end of the
 * request.
 */
@@ -1567,7 +1567,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs 
*engine)
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
 
-static int gen8_emit_request(struct drm_i915_gem_request *request)
+static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request)
 {
struct intel_ring *ring = request->ring;
int ret;
@@ -1590,7 +1590,7 @@ static int gen8_emit_request(struct drm_i915_gem_request 
*request)
return intel_logical_ring_advance(request);
 }
 
-static int gen8_emit_request_render(struct drm_i915_gem_request *request)
+static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
 {
struct intel_ring *ring = request->ring;
int ret;
@@ -1694,7 +1694,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
*engine)
engine->init_hw = gen8_init_common_ring;
engine->reset_hw = reset_common_ring;
engine->emit_flush = gen8_emit_flush;
-   engine->emit_request = gen8_emit_request;
+   engine->emit_breadcrumb = gen8_emit_breadcrumb;
engine->submit_request = execlists_submit_request;
 
engine->irq_enable = gen8_logical_ring_enable_irq;
@@ -1816,7 +1816,7 @@ int logical_render_ring_init(struct intel_engine_cs 
*engine)
engine->init_hw = gen8_init_render_ring;
engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render;
-   engine->emit_request = gen8_emit_request_render;
+   engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
 
ret = intel_engine_create_scratch(engine, 4096);
if (ret)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 65ffb1ceab3b..6a0c75c5833b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1330,7 +1330,7 @@ static void i9xx_submit_request(struct 
drm_i915_gem_request *request)
intel_ring_offset(request->ring, request->tail));
 }
 
-static int i9xx_emit_request(struct drm_i915_gem_request *req)
+static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req)
 {
struct intel_ring *ring = req->ring;
int ret;
@@ -1351,14 +1351,14 @@ static int i9xx_emit_request(struct 
drm_i915_gem_request *req)
 }
 
 /**
- * gen6_sema_emit_request - Update the semaphore mailbox registers
+ * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers
  *
  * @request - request to write to the ring
  *
  * Update the mailbox registers in the *other* rings with the current seqno.
  * This acts like a signal in the canonical semaphore.
  */
-static int gen6_sema_emit_request(struct drm_i915_gem_request *req)
+static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req)
 {
int ret;
 
@@ -1366,10 +1366,10 @@ static int gen6_sema_emit_request(struct 
drm_i915_gem_request 

[Intel-gfx] [PATCH 40/41] drm/i915: Enable userspace to opt-out of implicit fencing

2016-10-20 Thread Chris Wilson
Userspace is faced with a dilemma. The kernel requires implicit fencing
to manage resource usage (we always must wait for the GPU to finish
before releasing its PTE) and for third parties. However, userspace may
wish to avoid this serialisation if it is either using explicit fencing
between parties and wants more fine-grained access to buffers (e.g. it
may partition the buffer between uses and track fences on ranges rather
than the implicit fences tracking the whole object). It follows that
userspace needs a mechanism to avoid the kernel's serialisation on its
implicit fences before execbuf execution.

The next question is whether this is an object, execbuf or context flag.
Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on
shared winsys buffers, but implicit fencing on internal surfaces)
require a per-object level flag. Given that this flag need to be only
set once for the lifetime of the object, this reduces the convenience of
having an execbuf or context level flag (and avoids having multiple
pieces of uABI controlling the same feature).

Incorrect use of this flag will result in rendering corruption and GPU
hangs - but will not result in use-after-free or similar resource
tracking issues.

Serious caveat: write ordering is not strictly correct after setting
this flag on a render target on multiple engines. This affects all
subsequent GEM operations (execbuf, set-domain, pread) and shared
dma-buf operations. A fix is possible - but costly (both in terms of
further ABI changes and runtime overhead).

Testcase: igt/gem_exec_async
Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_drv.c|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +++
 include/uapi/drm/i915_drm.h| 27 ++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a32e128af45d..abfc5007f170 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -339,6 +339,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
case I915_PARAM_HAS_EXEC_SOFTPIN:
+   case I915_PARAM_HAS_EXEC_ASYNC:
/* For the time being all of these are always true;
 * if some supported hardware does not have one of these
 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a249486d0e3f..775c946bb9fb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1118,6 +1118,9 @@ i915_gem_execbuffer_move_to_gpu(struct 
drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
 
+   if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
+   continue;
+
ret = i915_gem_request_await_object
(req, obj, obj->base.pending_write_domain);
if (ret)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 03725fe89859..a2fa511b46b3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -388,6 +388,10 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_POOLED_EU38
 #define I915_PARAM_MIN_EU_IN_POOL   39
 #define I915_PARAM_MMAP_GTT_VERSION 40
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
+ * synchronisation with implicit fencing on individual objects.
+ */
+#define I915_PARAM_HAS_EXEC_ASYNC   41
 
 typedef struct drm_i915_getparam {
__s32 param;
@@ -729,8 +733,29 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED  (1<<4)
 #define EXEC_OBJECT_PAD_TO_SIZE (1<<5)
+/* The kernel implicitly tracks GPU activity on all GEM objects, and
+ * synchronises operations with outstanding rendering. This includes
+ * rendering on other devices if exported via dma-buf. However, sometimes
+ * this tracking is too coarse and the user knows better. For example,
+ * if the object is split into non-overlapping ranges shared between different
+ * clients or engines (i.e. suballocating objects), the implicit tracking
+ * by kernel assumes that each operation affects the whole object rather
+ * than an individual range, causing needless synchronisation between clients.
+ * The kernel will also forgo any CPU cache flushes prior to rendering from
+ * the object as the client is expected to be also handling such domain
+ * tracking.
+ *
+ * The kernel maintains the implicit tracking in order to manage resources
+ * used by the GPU - this flag only disables the 

[Intel-gfx] [PATCH 39/41] drm/i915: Enable multiple timelines

2016-10-20 Thread Chris Wilson
With the infrastructure converted over to tracking multiple timelines in
the GEM API whilst preserving the efficiency of using a single execution
timeline internally, we can now assign a separate timeline to every
context with full-ppgtt.

v2: Add a comment to indicate the xfer between timelines upon submission.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  | 10 ++
 drivers/gpu/drm/i915/i915_gem.c  | 10 +++---
 drivers/gpu/drm/i915/i915_gem_context.c  |  4 +--
 drivers/gpu/drm/i915/i915_gem_evict.c| 11 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c  | 19 ++
 drivers/gpu/drm/i915/i915_gem_gtt.h  |  4 ++-
 drivers/gpu/drm/i915/i915_gem_request.c  | 61 +---
 drivers/gpu/drm/i915/i915_gem_timeline.c |  1 +
 drivers/gpu/drm/i915/i915_gem_timeline.h |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  5 ---
 10 files changed, 77 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c48e4fd4f8b0..ff7c085642f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3536,6 +3536,16 @@ static inline void i915_gem_context_put(struct 
i915_gem_context *ctx)
kref_put(>ref, i915_gem_context_free);
 }
 
+static inline struct intel_timeline *
+i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
+struct intel_engine_cs *engine)
+{
+   struct i915_address_space *vm;
+
+   vm = ctx->ppgtt ? >ppgtt->base : >i915->ggtt.base;
+   return >timeline.engine[engine->id];
+}
+
 static inline bool i915_gem_context_is_default(const struct i915_gem_context 
*c)
 {
return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 38f7bad015bd..e520998d3601 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2567,12 +2567,9 @@ i915_gem_find_active_request(struct intel_engine_cs 
*engine)
 * not need an engine->irq_seqno_barrier() before the seqno reads.
 */
list_for_each_entry(request, >timeline->requests, link) {
-   if (i915_gem_request_completed(request))
+   if (__i915_gem_request_completed(request))
continue;
 
-   if (!i915_sw_fence_done(>submit))
-   break;
-
return request;
}
 
@@ -2600,6 +2597,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs 
*engine)
 {
struct drm_i915_gem_request *request;
struct i915_gem_context *incomplete_ctx;
+   struct intel_timeline *timeline;
bool ring_hung;
 
if (engine->irq_seqno_barrier)
@@ -2638,6 +2636,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs 
*engine)
list_for_each_entry_continue(request, >timeline->requests, link)
if (request->ctx == incomplete_ctx)
reset_request(request);
+
+   timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
+   list_for_each_entry(request, >requests, link)
+   reset_request(request);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index d3118db244c4..461aece6c5bd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -365,9 +365,9 @@ i915_gem_create_context(struct drm_device *dev,
return ctx;
 
if (USES_FULL_PPGTT(dev)) {
-   struct i915_hw_ppgtt *ppgtt =
-   i915_ppgtt_create(to_i915(dev), file_priv);
+   struct i915_hw_ppgtt *ppgtt;
 
+   ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name);
if (IS_ERR(ppgtt)) {
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 PTR_ERR(ppgtt));
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index 79b964152cd9..bd08814b015c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -33,14 +33,17 @@
 #include "intel_drv.h"
 #include "i915_trace.h"
 
-static bool
-gpu_is_idle(struct drm_i915_private *dev_priv)
+static bool ggtt_is_idle(struct drm_i915_private *dev_priv)
 {
+   struct i915_ggtt *ggtt = _priv->ggtt;
struct intel_engine_cs *engine;
enum intel_engine_id id;
 
for_each_engine(engine, dev_priv, id) {
-   if (intel_engine_is_active(engine))
+   struct intel_timeline *tl;
+
+   tl = >base.timeline.engine[engine->id];
+   if (i915_gem_active_isset(>last_request))
return false;
}
 
@@ -154,7 +157,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
if 

[Intel-gfx] [PATCH 25/41] drm/i915: Use lockless object free

2016-10-20 Thread Chris Wilson
Having moved the locked phase of freeing an object to a separate worker,
we can now declare to the core that we only need the unlocked variant of
driver->gem_free_object, and can use the simple unreference internally.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_drv.c |  2 +-
 drivers/gpu/drm/i915/i915_drv.h | 10 +-
 drivers/gpu/drm/i915/i915_gem.c | 30 +++---
 drivers/gpu/drm/i915/i915_gem_tiling.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  4 ++--
 drivers/gpu/drm/i915/intel_display.c|  6 +++---
 drivers/gpu/drm/i915/intel_overlay.c|  4 ++--
 drivers/gpu/drm/i915/intel_pm.c |  2 +-
 8 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index dd9cea8ec25c..c3df229346bd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2573,7 +2573,7 @@ static struct drm_driver driver = {
.set_busid = drm_pci_set_busid,
 
.gem_close_object = i915_gem_close_object,
-   .gem_free_object = i915_gem_free_object,
+   .gem_free_object_unlocked = i915_gem_free_object,
.gem_vm_ops = _gem_vm_ops,
 
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 446fa428c764..061bd3997b3e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2391,19 +2391,12 @@ __attribute__((nonnull))
 static inline void
 i915_gem_object_put(struct drm_i915_gem_object *obj)
 {
-   drm_gem_object_unreference(>base);
+   __drm_gem_object_unreference(>base);
 }
 
 __deprecated
 extern void drm_gem_object_unreference(struct drm_gem_object *);
 
-__attribute__((nonnull))
-static inline void
-i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
-{
-   drm_gem_object_unreference_unlocked(>base);
-}
-
 __deprecated
 extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
 
@@ -2498,7 +2491,6 @@ static inline struct i915_vma *i915_vma_get(struct 
i915_vma *vma)
 
 static inline void i915_vma_put(struct i915_vma *vma)
 {
-   lockdep_assert_held(>vm->dev->struct_mutex);
i915_gem_object_put(vma->obj);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 55813f9cea7f..db8b45772e6f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -616,7 +616,7 @@ i915_gem_create(struct drm_file *file,
 
ret = drm_gem_handle_create(file, >base, );
/* drop reference from allocate - handle holds it now */
-   i915_gem_object_put_unlocked(obj);
+   i915_gem_object_put(obj);
if (ret)
return ret;
 
@@ -1110,7 +1110,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 
i915_gem_object_unpin_pages(obj);
 out:
-   i915_gem_object_put_unlocked(obj);
+   i915_gem_object_put(obj);
return ret;
 }
 
@@ -1443,7 +1443,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
i915_gem_object_unpin_pages(obj);
 err:
-   i915_gem_object_put_unlocked(obj);
+   i915_gem_object_put(obj);
return ret;
 }
 
@@ -1519,7 +1519,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
   MAX_SCHEDULE_TIMEOUT,
   to_rps_client(file));
if (err)
-   goto out_unlocked;
+   goto out;
 
/* Flush and acquire obj->pages so that we are coherent through
 * direct access in memory with previous cached writes through
@@ -1531,11 +1531,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
 */
err = i915_gem_object_pin_pages(obj);
if (err)
-   goto out_unlocked;
+   goto out;
 
err = i915_mutex_lock_interruptible(dev);
if (err)
-   goto out_pages;
+   goto out_unpin;
 
if (read_domains & I915_GEM_DOMAIN_GTT)
err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
@@ -1550,10 +1550,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
if (write_domain != 0)
intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
 
-out_pages:
+out_unpin:
i915_gem_object_unpin_pages(obj);
-out_unlocked:
-   i915_gem_object_put_unlocked(obj);
+out:
+   i915_gem_object_put(obj);
return err;
 }
 
@@ -1584,7 +1584,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void 
*data,
}
}
 
-   i915_gem_object_put_unlocked(obj);
+   i915_gem_object_put(obj);
return err;
 }
 
@@ -1630,7 +1630,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 * pages from.
 */
if (!obj->base.filp) {
- 

[Intel-gfx] [PATCH 26/41] drm/i915: Move GEM activity tracking into a common struct reservation_object

2016-10-20 Thread Chris Wilson
In preparation to support many distinct timelines, we need to expand the
activity tracking on the GEM object to handle more than just a request
per engine. We already use the struct reservation_object on the dma-buf
to handle many fence contexts, so integrating that into the GEM object
itself is the preferred solution. (For example, we can now share the same
reservation_object between every consumer/producer using this buffer and
skip the manual import/export via dma-buf.)

v2: Reimplement busy-ioctl (by walking the reservation object), postpone
the ABI change for another day. Similarly use the reservation object to
find the last_write request (if active and from i915) for choosing
display CS flips.

Caveats:

 * busy-ioctl: busy-ioctl only reports on the native fences, it will not
warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is
being rendered to by external fences. It also will not report the same
busy state as wait-ioctl (or polling on the dma-buf) in the same
circumstances. On the plus side, it does retain reporting of which
*i915* engines are engaged with this object.

 * non-blocking atomic modesets take a step backwards as the wait for
render completion blocks the ioctl. This is fixed in a subsequent
patch to use a fence instead for awaiting on the rendering, see
"drm/i915: Restore nonblocking awaits for modesetting"

 * dynamic array manipulation for shared-fences in reservation is slower
than the previous lockless static assignment (e.g. gem_exec_lut_handle
runtime on ivb goes from 42s to 66s), mainly due to atomic operations
(maintaining the fence refcounts).

 * loss of object-level retirement callbacks, emulated by VMA retirement
tracking.

 * minor loss of object-level last activity information from debugfs,
could be replaced with per-vma information if desired

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  15 +-
 drivers/gpu/drm/i915/i915_drv.h|  62 +++
 drivers/gpu/drm/i915/i915_gem.c| 266 -
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |  11 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |  53 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.h |  45 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  55 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c|  32 
 drivers/gpu/drm/i915/i915_gem_gtt.h|   1 +
 drivers/gpu/drm/i915/i915_gem_request.c|  48 +++---
 drivers/gpu/drm/i915/i915_gem_request.h|  37 +---
 drivers/gpu/drm/i915/i915_gpu_error.c  |   6 +-
 drivers/gpu/drm/i915/intel_atomic_plane.c  |   2 -
 drivers/gpu/drm/i915/intel_display.c   | 114 +++--
 drivers/gpu/drm/i915/intel_drv.h   |   3 -
 15 files changed, 213 insertions(+), 537 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_dmabuf.h

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 2780b07e04c1..b67db57bc135 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -136,11 +136,10 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
struct i915_vma *vma;
unsigned int frontbuffer_bits;
int pin_count = 0;
-   enum intel_engine_id id;
 
lockdep_assert_held(>base.dev->struct_mutex);
 
-   seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ",
+   seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
   >base,
   get_active_flag(obj),
   get_pin_flag(obj),
@@ -149,14 +148,7 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
   get_pin_mapped_flag(obj),
   obj->base.size / 1024,
   obj->base.read_domains,
-  obj->base.write_domain);
-   for_each_engine(engine, dev_priv, id)
-   seq_printf(m, "%x ",
-  i915_gem_active_get_seqno(>last_read[id],
-
>base.dev->struct_mutex));
-   seq_printf(m, "] %x %s%s%s",
-  i915_gem_active_get_seqno(>last_write,
->base.dev->struct_mutex),
+  obj->base.write_domain,
   i915_cache_level_str(dev_priv, obj->cache_level),
   obj->mm.dirty ? " dirty" : "",
   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -187,8 +179,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
if (obj->stolen)
seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
 
-   engine = i915_gem_active_get_engine(>last_write,
-   _priv->drm.struct_mutex);
+   engine = i915_gem_object_last_write_engine(obj);
if (engine)
seq_printf(m, " (%s)", engine->name);
 
diff --git 

[Intel-gfx] [PATCH 14/41] drm/i915: Markup GEM API with lockdep asserts

2016-10-20 Thread Chris Wilson
Add lockdep_assert_held(struct_mutex) to the API preamble of the
internal GEM interfaces.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_gem.c  | 21 +
 drivers/gpu/drm/i915/i915_gem_evict.c|  5 -
 drivers/gpu/drm/i915/i915_gem_gtt.c  |  2 ++
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 ++
 drivers/gpu/drm/i915/i915_gem_request.c  |  6 ++
 6 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e267e20bdcdb..0897f43e7796 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3419,6 +3419,7 @@ int __must_check i915_vma_put_fence(struct i915_vma *vma);
 static inline bool
 i915_vma_pin_fence(struct i915_vma *vma)
 {
+   lockdep_assert_held(>vm->dev->struct_mutex);
if (vma->fence) {
vma->fence->pin_count++;
return true;
@@ -3437,6 +3438,7 @@ i915_vma_pin_fence(struct i915_vma *vma)
 static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
+   lockdep_assert_held(>vm->dev->struct_mutex);
if (vma->fence) {
GEM_BUG_ON(vma->fence->pin_count <= 0);
vma->fence->pin_count--;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ee8d1405013e..d596b1f9e969 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -104,6 +104,8 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
int ret;
 
+   might_sleep();
+
if (!i915_reset_in_progress(error))
return 0;
 
@@ -2336,6 +2338,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
const struct drm_i915_gem_object_ops *ops = obj->ops;
 
+   lockdep_assert_held(>base.dev->struct_mutex);
+
if (obj->pages == NULL)
return 0;
 
@@ -2512,6 +2516,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
const struct drm_i915_gem_object_ops *ops = obj->ops;
int ret;
 
+   lockdep_assert_held(>base.dev->struct_mutex);
+
if (obj->pages)
return 0;
 
@@ -2793,6 +2799,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
struct intel_engine_cs *engine;
enum intel_engine_id id;
 
+   lockdep_assert_held(_priv->drm.struct_mutex);
+
i915_gem_retire_requests(dev_priv);
 
for_each_engine(engine, dev_priv, id)
@@ -3034,6 +3042,8 @@ int i915_vma_unbind(struct i915_vma *vma)
unsigned long active;
int ret;
 
+   lockdep_assert_held(>base.dev->struct_mutex);
+
/* First wait upon any activity as retiring the request may
 * have side-effects such as unpinning or even unbinding this vma.
 */
@@ -3430,6 +3440,7 @@ i915_gem_object_set_to_gtt_domain(struct 
drm_i915_gem_object *obj, bool write)
int ret;
 
lockdep_assert_held(>base.dev->struct_mutex);
+
ret = i915_gem_object_wait(obj,
   I915_WAIT_INTERRUPTIBLE |
   I915_WAIT_LOCKED |
@@ -3508,6 +3519,8 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret = 0;
 
+   lockdep_assert_held(>base.dev->struct_mutex);
+
if (obj->cache_level == cache_level)
goto out;
 
@@ -3712,6 +3725,8 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
u32 old_read_domains, old_write_domain;
int ret;
 
+   lockdep_assert_held(>base.dev->struct_mutex);
+
/* Mark the pin_display early so that we account for the
 * display coherency whilst setting up the cache domains.
 */
@@ -3779,6 +3794,8 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
 void
 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
+   lockdep_assert_held(>vm->dev->struct_mutex);
+
if (WARN_ON(vma->obj->pin_display == 0))
return;
 
@@ -3808,6 +3825,7 @@ i915_gem_object_set_to_cpu_domain(struct 
drm_i915_gem_object *obj, bool write)
int ret;
 
lockdep_assert_held(>base.dev->struct_mutex);
+
ret = i915_gem_object_wait(obj,
   I915_WAIT_INTERRUPTIBLE |
   I915_WAIT_LOCKED |
@@ -3963,6 +3981,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
unsigned int bound = vma->flags;
int ret;
 
+   lockdep_assert_held(>vm->dev->struct_mutex);
GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
 
@@ -4004,6 +4023,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret;
 
+   

[Intel-gfx] [PATCH 21/41] drm/i915: Implement pread without struct-mutex

2016-10-20 Thread Chris Wilson
We only need struct_mutex within pread for a brief window where we need
to serialise with rendering and control our cache domains. Elsewhere we
can rely on the backing storage being pinned, and forgive userspace any
races against us.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem.c | 365 +---
 1 file changed, 157 insertions(+), 208 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bfa80302e2d5..ab119ea49634 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -63,13 +63,13 @@ static bool cpu_write_needs_clflush(struct 
drm_i915_gem_object *obj)
 }
 
 static int
-insert_mappable_node(struct drm_i915_private *i915,
+insert_mappable_node(struct i915_ggtt *ggtt,
  struct drm_mm_node *node, u32 size)
 {
memset(node, 0, sizeof(*node));
-   return drm_mm_insert_node_in_range_generic(>ggtt.base.mm, node,
-  size, 0, 0, 0,
-  i915->ggtt.mappable_end,
+   return drm_mm_insert_node_in_range_generic(>base.mm, node,
+  size, 0, -1,
+  0, ggtt->mappable_end,
   DRM_MM_SEARCH_DEFAULT,
   DRM_MM_CREATE_DEFAULT);
 }
@@ -820,32 +820,6 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
return ret;
 }
 
-/* Per-page copy function for the shmem pread fastpath.
- * Flushes invalid cachelines before reading the target if
- * needs_clflush is set. */
-static int
-shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
-char __user *user_data,
-bool page_do_bit17_swizzling, bool needs_clflush)
-{
-   char *vaddr;
-   int ret;
-
-   if (unlikely(page_do_bit17_swizzling))
-   return -EINVAL;
-
-   vaddr = kmap_atomic(page);
-   if (needs_clflush)
-   drm_clflush_virt_range(vaddr + shmem_page_offset,
-  page_length);
-   ret = __copy_to_user_inatomic(user_data,
- vaddr + shmem_page_offset,
- page_length);
-   kunmap_atomic(vaddr);
-
-   return ret ? -EFAULT : 0;
-}
-
 static void
 shmem_clflush_swizzled_range(char *addr, unsigned long length,
 bool swizzled)
@@ -871,7 +845,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long 
length,
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_slow(struct page *page, int offset, int length,
 char __user *user_data,
 bool page_do_bit17_swizzling, bool needs_clflush)
 {
@@ -880,61 +854,130 @@ shmem_pread_slow(struct page *page, int 
shmem_page_offset, int page_length,
 
vaddr = kmap(page);
if (needs_clflush)
-   shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
-page_length,
+   shmem_clflush_swizzled_range(vaddr + offset, length,
 page_do_bit17_swizzling);
 
if (page_do_bit17_swizzling)
-   ret = __copy_to_user_swizzled(user_data,
- vaddr, shmem_page_offset,
- page_length);
+   ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
else
-   ret = __copy_to_user(user_data,
-vaddr + shmem_page_offset,
-page_length);
+   ret = __copy_to_user(user_data, vaddr + offset, length);
kunmap(page);
 
return ret ? - EFAULT : 0;
 }
 
-static inline unsigned long
-slow_user_access(struct io_mapping *mapping,
-uint64_t page_base, int page_offset,
-char __user *user_data,
-unsigned long length, bool pwrite)
+static int
+shmem_pread(struct page *page, int offset, int length, char __user *user_data,
+   bool page_do_bit17_swizzling, bool needs_clflush)
+{
+   int ret;
+
+   ret = -ENODEV;
+   if (!page_do_bit17_swizzling) {
+   char *vaddr = kmap_atomic(page);
+
+   if (needs_clflush)
+   drm_clflush_virt_range(vaddr + offset, length);
+   ret = __copy_to_user_inatomic(user_data, vaddr + offset, 
length);
+   kunmap_atomic(vaddr);
+   }
+   if (ret == 0)
+   

[Intel-gfx] [PATCH 37/41] drm/i915: Reserve space in the global seqno during request allocation

2016-10-20 Thread Chris Wilson
A restriction on our global seqno is that they cannot wrap, and that we
cannot use the value 0. This allows us to detect when a request has not
yet been submitted, its global seqno is still 0, and ensures that
hardware semaphores are monotonic as required by older hardware. To
meet these restrictions when we defer the assignment of the global
seqno, we must check that we have an available slot in the global seqno
space during request construction. If that test fails, we wait for all
requests to be completed and reset the hardware back to 0.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 10 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/i915_gem.c  |  7 ++-
 drivers/gpu/drm/i915/i915_gem_request.c  | 86 +---
 drivers/gpu/drm/i915/i915_gem_timeline.h |  2 +-
 5 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index a11c3e84d997..5ca5b63c0f71 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
seq_printf(m, "Flip queued on %s at seqno %x, 
next seqno %x [current breadcrumb %x], completed? %d\n",
   engine->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
-  
dev_priv->gt.global_timeline.next_seqno,
+  
atomic_read(_priv->gt.global_timeline.next_seqno),
   intel_engine_get_seqno(engine),
   
i915_gem_request_completed(work->flip_queued_req));
} else
@@ -1046,7 +1046,7 @@ i915_next_seqno_get(void *data, u64 *val)
 {
struct drm_i915_private *dev_priv = data;
 
-   *val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno);
+   *val = atomic_read(_priv->gt.global_timeline.next_seqno);
return 0;
 }
 
@@ -2275,8 +2275,8 @@ static int i915_rps_boost_info(struct seq_file *m, void 
*data)
struct drm_file *file;
 
seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
-   seq_printf(m, "GPU busy? %s [%x]\n",
-  yesno(dev_priv->gt.awake), dev_priv->gt.active_engines);
+   seq_printf(m, "GPU busy? %s [%d requests]\n",
+  yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Frequency requested %d\n",
   intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
@@ -2311,7 +2311,7 @@ static int i915_rps_boost_info(struct seq_file *m, void 
*data)
 
if (INTEL_GEN(dev_priv) >= 6 &&
dev_priv->rps.enabled &&
-   dev_priv->gt.active_engines) {
+   dev_priv->gt.active_requests) {
u32 rpup, rpupei;
u32 rpdown, rpdownei;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 93061993b0fb..c48e4fd4f8b0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2081,6 +2081,7 @@ struct drm_i915_private {
 
struct list_head timelines;
struct i915_gem_timeline global_timeline;
+   u32 active_requests;
 
/**
 * Is the GPU currently considered idle, or busy executing
@@ -2089,7 +2090,6 @@ struct drm_i915_private {
 * In order to reduce the effect on performance, there
 * is a slight delay before we do so.
 */
-   unsigned int active_engines;
bool awake;
 
/**
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5053fda016d8..38f7bad015bd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2691,8 +2691,6 @@ static void i915_gem_cleanup_engine(struct 
intel_engine_cs *engine)
memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
spin_unlock(>execlist_lock);
}
-
-   engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
@@ -2749,7 +2747,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (!READ_ONCE(dev_priv->gt.awake))
return;
 
-   if (READ_ONCE(dev_priv->gt.active_engines))
+   if (READ_ONCE(dev_priv->gt.active_requests))
return;
 
rearm_hangcheck =
@@ -2763,7 +2761,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
goto out_rearm;
}
 
-   if (dev_priv->gt.active_engines)

[Intel-gfx] [PATCH 27/41] drm/i915: Restore nonblocking awaits for modesetting

2016-10-20 Thread Chris Wilson
After combining the dma-buf reservation object and the GEM reservation
object, we lost the ability to do a nonblocking wait on the i915 request
(as we blocked upon the reservation object during prepare_fb). We can
instead convert the reservation object into a fence upon which we can
asynchronously wait (including a forced timeout in case the DMA fence is
never signaled).

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/intel_display.c | 82 +++-
 drivers/gpu/drm/i915/intel_drv.h |  2 +
 2 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 13522903ec4e..c7c973b24eba 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14509,12 +14509,33 @@ static void intel_atomic_commit_tail(struct 
drm_atomic_state *state)
 
 static void intel_atomic_commit_work(struct work_struct *work)
 {
-   struct drm_atomic_state *state = container_of(work,
- struct drm_atomic_state,
- commit_work);
+   struct drm_atomic_state *state =
+   container_of(work, struct drm_atomic_state, commit_work);
+
intel_atomic_commit_tail(state);
 }
 
+static int __i915_sw_fence_call
+intel_atomic_commit_ready(struct i915_sw_fence *fence,
+ enum i915_sw_fence_notify notify)
+{
+   struct intel_atomic_state *state =
+   container_of(fence, struct intel_atomic_state, commit_ready);
+
+   switch (notify) {
+   case FENCE_COMPLETE:
+   if (state->base.commit_work.func)
+   queue_work(system_unbound_wq, >base.commit_work);
+   break;
+
+   case FENCE_FREE:
+   drm_atomic_state_put(>base);
+   break;
+   }
+
+   return NOTIFY_DONE;
+}
+
 static void intel_atomic_track_fbs(struct drm_atomic_state *state)
 {
struct drm_plane_state *old_plane_state;
@@ -14560,11 +14581,14 @@ static int intel_atomic_commit(struct drm_device *dev,
if (ret)
return ret;
 
-   INIT_WORK(>commit_work, intel_atomic_commit_work);
+   drm_atomic_state_get(state);
+   i915_sw_fence_init(_state->commit_ready,
+  intel_atomic_commit_ready);
 
ret = intel_atomic_prepare_commit(dev, state);
if (ret) {
DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
+   i915_sw_fence_commit(_state->commit_ready);
return ret;
}
 
@@ -14575,10 +14599,14 @@ static int intel_atomic_commit(struct drm_device *dev,
intel_atomic_track_fbs(state);
 
drm_atomic_state_get(state);
-   if (nonblock)
-   queue_work(system_unbound_wq, >commit_work);
-   else
+   INIT_WORK(>commit_work,
+ nonblock ? intel_atomic_commit_work : NULL);
+
+   i915_sw_fence_commit(_state->commit_ready);
+   if (!nonblock) {
+   i915_sw_fence_wait(_state->commit_ready);
intel_atomic_commit_tail(state);
+   }
 
return 0;
 }
@@ -14690,20 +14718,22 @@ int
 intel_prepare_plane_fb(struct drm_plane *plane,
   struct drm_plane_state *new_state)
 {
+   struct intel_atomic_state *intel_state =
+   to_intel_atomic_state(new_state->state);
struct drm_device *dev = plane->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_framebuffer *fb = new_state->fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
-   long lret;
-   int ret = 0;
+   int ret;
 
if (!obj && !old_obj)
return 0;
 
if (old_obj) {
struct drm_crtc_state *crtc_state =
-   drm_atomic_get_existing_crtc_state(new_state->state, 
plane->state->crtc);
+   drm_atomic_get_existing_crtc_state(new_state->state,
+  plane->state->crtc);
 
/* Big Hammer, we also need to ensure that any pending
 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
@@ -14716,31 +14746,25 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 * This should only fail upon a hung GPU, in which case we
 * can safely continue.
 */
-   if (needs_modeset(crtc_state))
-   ret = i915_gem_object_wait(old_obj,
-  I915_WAIT_INTERRUPTIBLE |
-  I915_WAIT_LOCKED,
-  MAX_SCHEDULE_TIMEOUT,
- 

[Intel-gfx] [PATCH 33/41] drm/i915: Record space required for breadcrumb emission

2016-10-20 Thread Chris Wilson
In the next patch, we will use deferred breadcrumb emission. That requires
reserving sufficient space in the ringbuffer to emit the breadcrumb, which
first requires us to know how large the breadcrumb is.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem_request.c |  1 +
 drivers/gpu/drm/i915/intel_lrc.c|  6 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 29 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 5f643e082f2d..8a5ae624b125 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -434,6 +434,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 * away, e.g. because a GPU scheduler has deferred it.
 */
req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
+   GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz);
 
if (i915.enable_execlists)
ret = intel_logical_ring_alloc_request_extras(req);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 57dba458f185..8229baebb2b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1590,6 +1590,8 @@ static int gen8_emit_breadcrumb(struct 
drm_i915_gem_request *request)
return intel_logical_ring_advance(request);
 }
 
+static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+
 static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
 {
struct intel_ring *ring = request->ring;
@@ -1621,6 +1623,8 @@ static int gen8_emit_breadcrumb_render(struct 
drm_i915_gem_request *request)
return intel_logical_ring_advance(request);
 }
 
+static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
+
 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 {
int ret;
@@ -1695,6 +1699,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
*engine)
engine->reset_hw = reset_common_ring;
engine->emit_flush = gen8_emit_flush;
engine->emit_breadcrumb = gen8_emit_breadcrumb;
+   engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
engine->submit_request = execlists_submit_request;
 
engine->irq_enable = gen8_logical_ring_enable_irq;
@@ -1817,6 +1822,7 @@ int logical_render_ring_init(struct intel_engine_cs 
*engine)
engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render;
engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
+   engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz;
 
ret = intel_engine_create_scratch(engine, 4096);
if (ret)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6a0c75c5833b..95f8b3b13351 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1350,6 +1350,8 @@ static int i9xx_emit_breadcrumb(struct 
drm_i915_gem_request *req)
return 0;
 }
 
+static const int i9xx_emit_breadcrumb_sz = 4;
+
 /**
  * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers
  *
@@ -1403,6 +1405,8 @@ static int gen8_render_emit_breadcrumb(struct 
drm_i915_gem_request *req)
return 0;
 }
 
+static const int gen8_render_emit_breadcrumb_sz = 8;
+
 /**
  * intel_ring_sync - sync the waiter to the signaller on seqno
  *
@@ -2640,8 +2644,21 @@ static void intel_ring_default_vfuncs(struct 
drm_i915_private *dev_priv,
engine->reset_hw = reset_ring_common;
 
engine->emit_breadcrumb = i9xx_emit_breadcrumb;
-   if (i915.semaphores)
+   engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
+   if (i915.semaphores) {
+   int num_rings;
+
engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
+
+   num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
+   if (INTEL_GEN(dev_priv) >= 8) {
+   engine->emit_breadcrumb_sz += num_rings * 6;
+   } else {
+   engine->emit_breadcrumb_sz += num_rings * 3;
+   if (num_rings & 1)
+   engine->emit_breadcrumb_sz++;
+   }
+   }
engine->submit_request = i9xx_submit_request;
 
if (INTEL_GEN(dev_priv) >= 8)
@@ -2669,9 +2686,17 @@ int intel_init_render_ring_buffer(struct intel_engine_cs 
*engine)
if (INTEL_GEN(dev_priv) >= 8) {
engine->init_context = intel_rcs_ctx_init;
engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
+   engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz;
engine->emit_flush = gen8_render_ring_flush;
-   if (i915.semaphores)
+   

[Intel-gfx] [PATCH 41/41] drm/i915: Support explicit fencing for execbuf

2016-10-20 Thread Chris Wilson
Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/Kconfig   |  1 +
 drivers/gpu/drm/i915/i915_drv.c|  3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 53 +++---
 include/uapi/drm/i915_drm.h| 36 +++-
 4 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 1c1b19ccb92f..c383684b538f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -18,6 +18,7 @@ config DRM_I915
select INPUT if ACPI
select ACPI_VIDEO if ACPI
select ACPI_BUTTON if ACPI
+   select SYNC_FILE
help
  Choose this option if you have a system that has "Intel Graphics
  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index abfc5007f170..4df75e63cf22 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -340,6 +340,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
case I915_PARAM_HAS_EXEC_SOFTPIN:
case I915_PARAM_HAS_EXEC_ASYNC:
+   case I915_PARAM_HAS_EXEC_FENCE:
/* For the time being all of these are always true;
 * if some supported hardware does not have one of these
 * features this value needs to be provided from
@@ -2529,7 +2530,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, 
DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, 
DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-   DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, 
DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, 
DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 775c946bb9fb..f60b57b7d677 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -1596,6 +1597,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_execbuffer_params *params = _master;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 dispatch_flags;
+   struct fence *in_fence = NULL;
+   struct sync_file *out_fence = NULL;
+   int out_fence_fd = -1;
int ret;
bool need_relocs;
 
@@ -1639,6 +1643,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
dispatch_flags |= I915_DISPATCH_RS;
}
 
+   if (args->flags & I915_EXEC_FENCE_IN) {
+   in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+   if (!in_fence) {
+   ret = -EINVAL;
+   goto pre_mutex_err;
+   }
+   }
+
+   if (args->flags & I915_EXEC_FENCE_OUT) {
+   out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+   if (out_fence_fd < 0) {
+   ret = out_fence_fd;
+   out_fence_fd = -1;
+   goto pre_mutex_err;
+   }
+   }
+
/* Take a local wakeref for preparing to dispatch the execbuf as
 * we expect to access the hardware fairly frequently in the
 * process. Upon first dispatch, we acquire another prolonged
@@ -1783,6 +1804,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
goto err_batch_unpin;
}
 
+   if (in_fence) {
+   ret = i915_gem_request_await_fence(params->request, in_fence);
+   if (ret < 0)
+   goto err_request;
+   }
+
+   if (out_fence_fd != -1) {
+   out_fence = 
sync_file_create(fence_get(>request->fence));
+   if (!out_fence) {
+   ret = -ENOMEM;
+   goto err_request;
+   }
+   }
+
/* Whilst this request exists, batch_obj will be on the

[Intel-gfx] [PATCH 30/41] drm/i915: Wait first for submission, before waiting for request completion

2016-10-20 Thread Chris Wilson
In future patches, we will no longer be able to wait on a static global
seqno and instead have to break our wait up into phases. First we wait
for the global seqno assignment (upon submission to hardware), and once
submitted we wait for the hardware to complete.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem_request.c | 51 +
 1 file changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 445efa5a7b6b..1aba0b57da3d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -789,6 +789,49 @@ bool __i915_spin_request(const struct drm_i915_gem_request 
*req,
return false;
 }
 
+static long
+__i915_request_wait_for_submit(struct drm_i915_gem_request *request,
+  unsigned int flags,
+  long timeout)
+{
+   const int state = flags & I915_WAIT_INTERRUPTIBLE ?
+   TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+   wait_queue_head_t *q = >i915->gpu_error.wait_queue;
+   DEFINE_WAIT(reset);
+   DEFINE_WAIT(wait);
+
+   if (flags & I915_WAIT_LOCKED)
+   add_wait_queue(q, );
+
+   do {
+   prepare_to_wait(>submit.wait, , state);
+
+   if (i915_sw_fence_done(>submit))
+   break;
+
+   if (flags & I915_WAIT_LOCKED &&
+   i915_reset_in_progress(>i915->gpu_error)) {
+   __set_current_state(TASK_RUNNING);
+   i915_reset(request->i915);
+   reset_wait_queue(q, );
+   continue;
+   }
+
+   if (signal_pending_state(state, current)) {
+   timeout = -ERESTARTSYS;
+   break;
+   }
+
+   timeout = io_schedule_timeout(timeout);
+   } while (timeout);
+   finish_wait(>submit.wait, );
+
+   if (flags & I915_WAIT_LOCKED)
+   remove_wait_queue(q, );
+
+   return timeout;
+}
+
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: the request to wait upon
@@ -832,6 +875,14 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
trace_i915_gem_request_wait_begin(req);
 
+   if (!i915_sw_fence_done(>submit)) {
+   timeout = __i915_request_wait_for_submit(req, flags, timeout);
+   if (timeout < 0)
+   goto complete;
+
+   GEM_BUG_ON(!i915_sw_fence_done(>submit));
+   }
+
/* Optimistic short spin before touching IRQs */
if (i915_spin_request(req, state, 5))
goto complete;
-- 
2.9.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 31/41] drm/i915: Introduce a global_seqno for each request

2016-10-20 Thread Chris Wilson
Though we will have multiple timelines, we still have a single timeline
of execution. This we can use to provide an execution and retirement order
of requests. This keeps tracking execution of requests simple, and vital
for preserving a single waiter (i.e. so that we can order the waiters so
that only the earliest to wakeup need be woken). To accomplish this we
distinguish the seqno used to order requests per-context (external) and
that used internally for execution.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  4 ++--
 drivers/gpu/drm/i915/i915_gem.c|  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c| 19 +-
 drivers/gpu/drm/i915/i915_gem_request.h| 32 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  4 ++--
 drivers/gpu/drm/i915/i915_trace.h  |  8 
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |  8 +---
 drivers/gpu/drm/i915/intel_lrc.c   |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 14 ++---
 11 files changed, 66 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6d74a02a0677..6aaa4ae9d0a8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -637,7 +637,7 @@ static void print_request(struct seq_file *m,
rcu_read_lock();
task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix,
-  rq->fence.seqno, rq->ctx->hw_id, rq->fence.seqno,
+  rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
   task ? task->comm : "",
   task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 16e1f6c35ef6..9625b446c069 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4001,7 +4001,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request 
*req)
/* Before we do the heavier coherent read of the seqno,
 * check the value (hopefully) in the CPU cacheline.
 */
-   if (i915_gem_request_completed(req))
+   if (__i915_gem_request_completed(req))
return true;
 
/* Ensure our read of the seqno is coherent so that we
@@ -4052,7 +4052,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request 
*req)
wake_up_process(tsk);
rcu_read_unlock();
 
-   if (i915_gem_request_completed(req))
+   if (__i915_gem_request_completed(req))
return true;
}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 748020ad1033..5053fda016d8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2618,7 +2618,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs 
*engine)
return;
 
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-engine->name, request->fence.seqno);
+engine->name, request->global_seqno);
 
/* Setup the CS to resume from the breadcrumb of the hung request */
engine->reset_hw(engine, request);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 1aba0b57da3d..5c01b9548e72 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -376,7 +376,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 * of being read by __i915_gem_active_get_rcu(). As such,
 * we have to be very careful when overwriting the contents. During
 * the RCU lookup, we change chase the request->engine pointer,
-* read the request->fence.seqno and increment the reference count.
+* read the request->global_seqno and increment the reference count.
 *
 * The reference count is incremented atomically. If it is zero,
 * the lookup knows the request is unallocated and complete. Otherwise,
@@ -418,6 +418,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
INIT_LIST_HEAD(>active_list);
req->i915 = dev_priv;
req->engine = engine;
+   req->global_seqno = seqno;
req->ctx = i915_gem_context_get(ctx);
 
/* No zalloc, must clear what we need by hand */
@@ -475,8 +476,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request 
*to,
return ret < 0 ? ret : 0;
}
 
+   if (!from->global_seqno) {
+   ret = i915_sw_fence_await_dma_fence(>submit,
+  

[Intel-gfx] [PATCH 18/41] drm/i915: Pass around sg_table to get_pages/put_pages backend

2016-10-20 Thread Chris Wilson
The plan is to move obj->pages out from under the struct_mutex into its
own per-object lock. We need to prune any assumption of the struct_mutex
from the get_pages/put_pages backends, and to make it easier we pass
around the sg_table to operate on rather than indirectly via the obj.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_drv.h  |  36 +--
 drivers/gpu/drm/i915/i915_gem.c  | 172 +++
 drivers/gpu/drm/i915/i915_gem_dmabuf.c   |  20 ++--
 drivers/gpu/drm/i915/i915_gem_fence.c|  17 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c  |  19 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.h  |   6 +-
 drivers/gpu/drm/i915/i915_gem_internal.c |  23 ++---
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  11 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c   |  43 
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  88 
 10 files changed, 227 insertions(+), 208 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f2eda2bf0228..081aca2c76c0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2172,8 +2172,8 @@ struct drm_i915_gem_object_ops {
 * being released or under memory pressure (where we attempt to
 * reap pages for the shrinker).
 */
-   int (*get_pages)(struct drm_i915_gem_object *);
-   void (*put_pages)(struct drm_i915_gem_object *);
+   struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
+   void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
 
int (*dmabuf_export)(struct drm_i915_gem_object *);
void (*release)(struct drm_i915_gem_object *);
@@ -2308,8 +2308,6 @@ struct drm_i915_gem_object {
struct i915_gem_userptr {
uintptr_t ptr;
unsigned read_only :1;
-   unsigned workers :4;
-#define I915_GEM_USERPTR_MAX_WORKERS 15
 
struct i915_mm_struct *mm;
struct i915_mmu_object *mmu_object;
@@ -2371,6 +2369,19 @@ __deprecated
 extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
 
 static inline bool
+i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
+{
+   return atomic_read(>base.refcount.refcount) == 0;
+}
+
+#if IS_ENABLED(CONFIG_LOCKDEP)
+#define lockdep_assert_held_unless(lock, cond) \
+   GEM_BUG_ON(debug_locks && !lockdep_is_held(lock) && !(cond))
+#else
+#define lockdep_assert_held_unless(lock, cond)
+#endif
+
+static inline bool
 i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 {
return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
@@ -3198,6 +3209,8 @@ dma_addr_t
 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
unsigned long n);
 
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+struct sg_table *pages);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __must_check
@@ -3214,7 +3227,8 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 static inline void
 __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-   lockdep_assert_held(>base.dev->struct_mutex);
+   lockdep_assert_held_unless(>base.dev->struct_mutex,
+  i915_gem_object_is_dead(obj));
GEM_BUG_ON(!obj->mm.pages);
 
obj->mm.pages_pin_count++;
@@ -3229,7 +3243,8 @@ i915_gem_object_has_pinned_pages(struct 
drm_i915_gem_object *obj)
 static inline void
 __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
-   lockdep_assert_held(>base.dev->struct_mutex);
+   lockdep_assert_held_unless(>base.dev->struct_mutex,
+  i915_gem_object_is_dead(obj));
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
GEM_BUG_ON(!obj->mm.pages);
 
@@ -3242,7 +3257,8 @@ static inline void i915_gem_object_unpin_pages(struct 
drm_i915_gem_object *obj)
__i915_gem_object_unpin_pages(obj);
 }
 
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
 
 enum i915_map_type {
I915_MAP_WB = 0,
@@ -3467,8 +3483,10 @@ i915_vma_unpin_fence(struct i915_vma *vma)
 void i915_gem_restore_fences(struct drm_device *dev);
 
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
-void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
-void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+  struct sg_table *pages);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+

[Intel-gfx] [PATCH 36/41] drm/i915: Create a unique name for the context

2016-10-20 Thread Chris Wilson
This will be used for communicating issues with this context to
userspace, so we want to identify the parent process and the individual
context. Note that the name isn't quite unique, it makes the presumption
of there only being a single device fd per process.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 11 ++-
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 23 ++-
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 3d1c86b949ac..a11c3e84d997 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -631,17 +631,10 @@ static void print_request(struct seq_file *m,
  struct drm_i915_gem_request *rq,
  const char *prefix)
 {
-   struct pid *pid = rq->ctx->pid;
-   struct task_struct *task;
-
-   rcu_read_lock();
-   task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
-   seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix,
+   seq_printf(m, "%s%x [%x:%x] @ %d: %s\n", prefix,
   rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
-  task ? task->comm : "",
-  task ? task->pid : -1);
-   rcu_read_unlock();
+  rq->timeline->common->name);
 }
 
 static int i915_gem_request_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 25c550893e3d..93061993b0fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -934,6 +934,7 @@ struct i915_gem_context {
struct drm_i915_file_private *file_priv;
struct i915_hw_ppgtt *ppgtt;
struct pid *pid;
+   const char *name;
 
struct i915_ctx_hang_stats hang_stats;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index a2acb8bb5f34..d3118db244c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -158,6 +158,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
__i915_gem_object_release_unless_active(ce->state->obj);
}
 
+   kfree(ctx->name);
put_pid(ctx->pid);
list_del(>link);
 
@@ -303,19 +304,28 @@ __create_hw_context(struct drm_device *dev,
}
 
/* Default context will never have a file_priv */
-   if (file_priv != NULL) {
+   ret = DEFAULT_CONTEXT_HANDLE;
+   if (file_priv) {
ret = idr_alloc(_priv->context_idr, ctx,
DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
if (ret < 0)
goto err_out;
-   } else
-   ret = DEFAULT_CONTEXT_HANDLE;
+   }
+   ctx->user_handle = ret;
 
ctx->file_priv = file_priv;
-   if (file_priv)
+   if (file_priv) {
ctx->pid = get_task_pid(current, PIDTYPE_PID);
+   ctx->name = kasprintf(GFP_KERNEL, "%s[%d]/%x",
+ current->comm,
+ pid_nr(ctx->pid),
+ ctx->user_handle);
+   if (!ctx->name) {
+   ret = -ENOMEM;
+   goto err_pid;
+   }
+   }
 
-   ctx->user_handle = ret;
/* NB: Mark all slices as needing a remap so that when the context first
 * loads it will restore whatever remap state already exists. If there
 * is no remap info, it will be a NOP. */
@@ -329,6 +339,9 @@ __create_hw_context(struct drm_device *dev,
 
return ctx;
 
+err_pid:
+   put_pid(ctx->pid);
+   idr_remove(_priv->context_idr, ctx->user_handle);
 err_out:
context_close(ctx);
return ERR_PTR(ret);
-- 
2.9.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 12/41] drm/i915: Introduce an internal allocator for disposable private objects

2016-10-20 Thread Chris Wilson
Quite a few of our objects used for internal hardware programming do not
benefit from being swappable or from being zero initialised. As such
they do not benefit from using a shmemfs backing storage and since they
are internal and never directly exposed to the user, we do not need to
worry about providing a filp. For these we can use an
drm_i915_gem_object wrapper around a sg_table of plain struct page. They
are not swap backed and not automatically pinned. If they are reaped
by the shrinker, the pages are released and the contents discarded. For
the internal use case, this is fine as for example, ringbuffers are
pinned from being written by a request to be read by the hardware. Once
they are idle, they can be discarded entirely. As such they are a good
match for execlist ringbuffers and a small variety of other internal
objects.

In the first iteration, this is limited to the scratch batch buffers we
use (for command parsing and state initialisation).

v2: Allocate physically contiguous pages, where possible.
v3: Reduce maximum order on subsequent requests following an allocation
failure.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/Makefile|   1 +
 drivers/gpu/drm/i915/i915_drv.h  |   5 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  27 ++---
 drivers/gpu/drm/i915/i915_gem_internal.c | 167 +++
 drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c   |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  14 ++-
 7 files changed, 194 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 612340097f4b..7faa04c91e1a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
  i915_gem_execbuffer.o \
  i915_gem_fence.o \
  i915_gem_gtt.o \
+ i915_gem_internal.o \
  i915_gem.o \
  i915_gem_render_state.o \
  i915_gem_request.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e93c3797d90..e267e20bdcdb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3543,6 +3543,11 @@ i915_gem_object_create_stolen_for_preallocated(struct 
drm_device *dev,
   u32 gtt_offset,
   u32 size);
 
+/* i915_gem_internal.c */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
+   unsigned int size);
+
 /* i915_gem_shrinker.c */
 unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
  unsigned long target,
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index cb25cad3318c..aa4e1e043b4e 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
size_t size)
 {
struct drm_i915_gem_object *obj = NULL;
-   struct drm_i915_gem_object *tmp, *next;
+   struct drm_i915_gem_object *tmp;
struct list_head *list;
-   int n;
+   int n, ret;
 
lockdep_assert_held(>engine->i915->drm.struct_mutex);
 
@@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
n = ARRAY_SIZE(pool->cache_list) - 1;
list = >cache_list[n];
 
-   list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
+   list_for_each_entry(tmp, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
if (!i915_gem_active_is_idle(>last_read[pool->engine->id],
 >base.dev->struct_mutex))
break;
 
-   /* While we're looping, do some clean up */
-   if (tmp->madv == __I915_MADV_PURGED) {
-   list_del(>batch_pool_link);
-   i915_gem_object_put(tmp);
-   continue;
-   }
-
if (tmp->base.size >= size) {
obj = tmp;
break;
@@ -132,19 +125,15 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
}
 
if (obj == NULL) {
-   int ret;
-
-   obj = i915_gem_object_create(>engine->i915->drm, size);
+   obj = i915_gem_object_create_internal(pool->engine->i915, size);
if (IS_ERR(obj))
return obj;
-
-   ret = i915_gem_object_get_pages(obj);
-   if (ret)
-   return ERR_PTR(ret);
-
-   obj->madv = 

[Intel-gfx] [PATCH 28/41] drm/i915: Combine seqno + tracking into a global timeline struct

2016-10-20 Thread Chris Wilson
Our timelines are more than just a seqno. They also provide an ordered
list of requests to be executed. Due to the restriction of handling
individual address spaces, we are limited to a timeline per address
space but we use a fence context per engine within.

Our first step to introducing independent timelines per context (i.e. to
allow each context to have a queue of requests to execute that have a
defined set of dependencies on other requests) is to provide a timeline
abstraction for the global execution queue.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/Makefile  |  1 +
 drivers/gpu/drm/i915/i915_debugfs.c| 33 +---
 drivers/gpu/drm/i915/i915_drv.c|  6 ++-
 drivers/gpu/drm/i915/i915_drv.h|  9 ++--
 drivers/gpu/drm/i915/i915_gem.c| 72 --
 drivers/gpu/drm/i915/i915_gem.h|  2 +
 drivers/gpu/drm/i915/i915_gem_request.c| 81 ++
 drivers/gpu/drm/i915/i915_gem_request.h|  1 +
 drivers/gpu/drm/i915/i915_gem_timeline.c   | 64 +++
 drivers/gpu/drm/i915/i915_gem_timeline.h   | 70 ++
 drivers/gpu/drm/i915/i915_gpu_error.c  |  6 +--
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_irq.c|  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c | 15 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h| 36 ++---
 15 files changed, 286 insertions(+), 115 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 7faa04c91e1a..240ce9a8d68e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ i915-y += i915_cmd_parser.o \
  i915_gem_shrinker.o \
  i915_gem_stolen.o \
  i915_gem_tiling.o \
+ i915_gem_timeline.o \
  i915_gem_userptr.o \
  i915_trace_points.o \
  intel_breadcrumbs.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index b67db57bc135..6d74a02a0677 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
seq_printf(m, "Flip queued on %s at seqno %x, 
next seqno %x [current breadcrumb %x], completed? %d\n",
   engine->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
-  dev_priv->next_seqno,
+  
dev_priv->gt.global_timeline.next_seqno,
   intel_engine_get_seqno(engine),
   
i915_gem_request_completed(work->flip_queued_req));
} else
@@ -662,13 +662,13 @@ static int i915_gem_request_info(struct seq_file *m, void 
*data)
int count;
 
count = 0;
-   list_for_each_entry(req, >request_list, link)
+   list_for_each_entry(req, >timeline->requests, link)
count++;
if (count == 0)
continue;
 
seq_printf(m, "%s requests: %d\n", engine->name, count);
-   list_for_each_entry(req, >request_list, link)
+   list_for_each_entry(req, >timeline->requests, link)
print_request(m, req, "");
 
any++;
@@ -1052,15 +1052,8 @@ static int
 i915_next_seqno_get(void *data, u64 *val)
 {
struct drm_i915_private *dev_priv = data;
-   int ret;
-
-   ret = mutex_lock_interruptible(_priv->drm.struct_mutex);
-   if (ret)
-   return ret;
-
-   *val = dev_priv->next_seqno;
-   mutex_unlock(_priv->drm.struct_mutex);
 
+   *val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno);
return 0;
 }
 
@@ -1075,7 +1068,7 @@ i915_next_seqno_set(void *data, u64 val)
if (ret)
return ret;
 
-   ret = i915_gem_set_seqno(dev, val);
+   ret = i915_gem_set_global_seqno(dev, val);
mutex_unlock(>struct_mutex);
 
return ret;
@@ -1364,7 +1357,7 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
   engine->hangcheck.seqno,
   seqno[id],
-  engine->last_submitted_seqno);
+  engine->timeline->last_submitted_seqno);
seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
   yesno(intel_engine_has_waiter(engine)),
 

[Intel-gfx] [PATCH 24/41] drm/i915: Move object release to a freelist + worker

2016-10-20 Thread Chris Wilson
We want to hide the latency of releasing objects and their backing
storage from the submission, so we move the actual free to a worker.
This allows us to switch to struct_mutex freeing of the object in the
next patch.

Furthermore, if we know that the object we are dereferencing remains valid
for the duration of our access, we can forgo the usual synchronisation
barriers and atomic reference counting. To ensure this we defer freeing
an object til after an RCU grace period, such that any lookup of the
object within an RCU read critical section will remain valid until
after we exit that critical section. We also employ this delay for
rate-limiting the serialisation on reallocation - we have to slow down
object creation in order to prevent resource starvation (in particular,
files).

v2: Return early in i915_gem_tiling() ioctl to skip over superfluous
work on error.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  15 ++-
 drivers/gpu/drm/i915/i915_drv.c  |  19 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  44 +++-
 drivers/gpu/drm/i915/i915_gem.c  | 166 +--
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  14 ++-
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  21 ++--
 6 files changed, 202 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 4893a3b62a68..2780b07e04c1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4889,10 +4889,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
 #define DROP_BOUND 0x2
 #define DROP_RETIRE 0x4
 #define DROP_ACTIVE 0x8
-#define DROP_ALL (DROP_UNBOUND | \
- DROP_BOUND | \
- DROP_RETIRE | \
- DROP_ACTIVE)
+#define DROP_FREED 0x10
+#define DROP_ALL (DROP_UNBOUND | \
+ DROP_BOUND| \
+ DROP_RETIRE   | \
+ DROP_ACTIVE   | \
+ DROP_FREED)
 static int
 i915_drop_caches_get(void *data, u64 *val)
 {
@@ -4936,6 +4938,11 @@ i915_drop_caches_set(void *data, u64 val)
 unlock:
mutex_unlock(>struct_mutex);
 
+   if (val & DROP_FREED) {
+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
+   }
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 99e4e044e958..dd9cea8ec25c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -537,14 +537,17 @@ static const struct vga_switcheroo_client_ops 
i915_switcheroo_ops = {
.can_switch = i915_switcheroo_can_switch,
 };
 
-static void i915_gem_fini(struct drm_device *dev)
+static void i915_gem_fini(struct drm_i915_private *dev_priv)
 {
-   mutex_lock(>struct_mutex);
-   i915_gem_cleanup_engines(dev);
-   i915_gem_context_fini(dev);
-   mutex_unlock(>struct_mutex);
+   mutex_lock(_priv->drm.struct_mutex);
+   i915_gem_cleanup_engines(_priv->drm);
+   i915_gem_context_fini(_priv->drm);
+   mutex_unlock(_priv->drm.struct_mutex);
+
+   synchronize_rcu();
+   flush_work(_priv->mm.free_work);
 
-   WARN_ON(!list_empty(_i915(dev)->context_list));
+   WARN_ON(!list_empty(_priv->context_list));
 }
 
 static int i915_load_modeset_init(struct drm_device *dev)
@@ -619,7 +622,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 cleanup_gem:
if (i915_gem_suspend(dev))
DRM_ERROR("failed to idle hardware; continuing to unload!\n");
-   i915_gem_fini(dev);
+   i915_gem_fini(dev_priv);
 cleanup_irq:
intel_guc_fini(dev);
drm_irq_uninstall(dev);
@@ -1303,7 +1306,7 @@ void i915_driver_unload(struct drm_device *dev)
drain_workqueue(dev_priv->wq);
 
intel_guc_fini(dev);
-   i915_gem_fini(dev);
+   i915_gem_fini(dev_priv);
intel_fbc_cleanup_cfb(dev_priv);
 
intel_power_domains_fini(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4876141d539b..446fa428c764 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1356,8 +1356,8 @@ struct i915_gem_mm {
struct list_head bound_list;
/**
 * List of objects which are not bound to the GTT (thus
-* are idle and not used by the GPU) but still have
-* (presumably uncached) pages still attached.
+* are idle and not used by the GPU). These objects may or may
+* not actually have any pages attached.
 */
struct list_head unbound_list;
 
@@ -1366,6 +1366,12 @@ struct i915_gem_mm {
 */
struct list_head userfault_list;
 
+   /**
+* List of objects which are pending destruction.
+*/
+   struct llist_head free_list;
+   struct work_struct free_work;
+
/** Usable 

[Intel-gfx] [PATCH 19/41] drm/i915: Move object backing storage manipulation to its own locking

2016-10-20 Thread Chris Wilson
Break the allocation of the backing storage away from struct_mutex into
a per-object lock. This allows parallel page allocation, provided we can
do so outside of struct_mutex (i.e. set-domain-ioctl, pwrite, GTT
fault), i.e. before execbuf! The increased cost of the atomic counters
are hidden behind i915_vma_pin() for the typical case of execbuf, i.e.
as the object is typically bound between execbufs, the page_pin_count is
static. The cost will be felt around set-domain and pwrite, but offset
by the improvement from reduced struct_mutex contention.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_drv.h  | 36 +
 drivers/gpu/drm/i915/i915_gem.c  | 93 +---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 51 --
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  2 +
 drivers/gpu/drm/i915/i915_gem_userptr.c  | 10 ++--
 5 files changed, 114 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 081aca2c76c0..4876141d539b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2261,7 +2261,8 @@ struct drm_i915_gem_object {
unsigned int pin_display;
 
struct {
-   unsigned int pages_pin_count;
+   struct mutex lock; /* protects the pages and their use */
+   atomic_t pages_pin_count;
 
struct sg_table *pages;
void *mapping;
@@ -2374,13 +2375,6 @@ i915_gem_object_is_dead(const struct drm_i915_gem_object 
*obj)
return atomic_read(>base.refcount.refcount) == 0;
 }
 
-#if IS_ENABLED(CONFIG_LOCKDEP)
-#define lockdep_assert_held_unless(lock, cond) \
-   GEM_BUG_ON(debug_locks && !lockdep_is_held(lock) && !(cond))
-#else
-#define lockdep_assert_held_unless(lock, cond)
-#endif
-
 static inline bool
 i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 {
@@ -3216,9 +3210,9 @@ int __i915_gem_object_get_pages(struct 
drm_i915_gem_object *obj);
 static inline int __must_check
 i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-   lockdep_assert_held(>base.dev->struct_mutex);
+   might_lock(>mm.lock);
 
-   if (obj->mm.pages_pin_count++)
+   if (atomic_inc_not_zero(>mm.pages_pin_count))
return 0;
 
return __i915_gem_object_get_pages(obj);
@@ -3227,32 +3221,29 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object 
*obj)
 static inline void
 __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-   lockdep_assert_held_unless(>base.dev->struct_mutex,
-  i915_gem_object_is_dead(obj));
GEM_BUG_ON(!obj->mm.pages);
 
-   obj->mm.pages_pin_count++;
+   atomic_inc(>mm.pages_pin_count);
 }
 
 static inline bool
 i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
 {
-   return obj->mm.pages_pin_count;
+   return atomic_read(>mm.pages_pin_count);
 }
 
 static inline void
 __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
-   lockdep_assert_held_unless(>base.dev->struct_mutex,
-  i915_gem_object_is_dead(obj));
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
GEM_BUG_ON(!obj->mm.pages);
 
-   obj->mm.pages_pin_count--;
-   GEM_BUG_ON(obj->mm.pages_pin_count < obj->bind_count);
+   atomic_dec(>mm.pages_pin_count);
+   GEM_BUG_ON(atomic_read(>mm.pages_pin_count) < obj->bind_count);
 }
 
-static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
__i915_gem_object_unpin_pages(obj);
 }
@@ -3275,8 +3266,8 @@ enum i915_map_type {
  * the kernel address space. Based on the @type of mapping, the PTE will be
  * set to either WriteBack or WriteCombine (via pgprot_t).
  *
- * The caller must hold the struct_mutex, and is responsible for calling
- * i915_gem_object_unpin_map() when the mapping is no longer required.
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
  *
  * Returns the pointer through which to access the mapped object, or an
  * ERR_PTR() on error.
@@ -3292,12 +3283,9 @@ void *__must_check i915_gem_object_pin_map(struct 
drm_i915_gem_object *obj,
  * with your access, call i915_gem_object_unpin_map() to release the pin
  * upon the mapping. Once the pin count reaches zero, that mapping may be
  * removed.
- *
- * The caller must hold the struct_mutex.
  */
 static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 {
-   lockdep_assert_held(>base.dev->struct_mutex);
i915_gem_object_unpin_pages(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index da299e3e48d1..bfa80302e2d5 100644
--- 

[Intel-gfx] [PATCH 20/41] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex

2016-10-20 Thread Chris Wilson
Use the per-object mm.lock to allocate the backing storage (and hold a
reference to it across the dmabuf access) without resorting to
struct_mutex.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 69 +++---
 1 file changed, 30 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 2abd524aba14..4d45f20d11ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -44,19 +44,15 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
dma_buf_attachment *attachme
struct scatterlist *src, *dst;
int ret, i;
 
-   ret = i915_mutex_lock_interruptible(obj->base.dev);
-   if (ret)
-   goto err;
-
ret = i915_gem_object_pin_pages(obj);
if (ret)
-   goto err_unlock;
+   goto err;
 
/* Copy sg so that we make an independent mapping */
st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
if (st == NULL) {
ret = -ENOMEM;
-   goto err_unpin;
+   goto err_unpin_pages;
}
 
ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
@@ -72,21 +68,18 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
dma_buf_attachment *attachme
}
 
if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
-   ret =-ENOMEM;
+   ret = -ENOMEM;
goto err_free_sg;
}
 
-   mutex_unlock(>base.dev->struct_mutex);
return st;
 
 err_free_sg:
sg_free_table(st);
 err_free:
kfree(st);
-err_unpin:
+err_unpin_pages:
i915_gem_object_unpin_pages(obj);
-err_unlock:
-   mutex_unlock(>base.dev->struct_mutex);
 err:
return ERR_PTR(ret);
 }
@@ -101,36 +94,21 @@ static void i915_gem_unmap_dma_buf(struct 
dma_buf_attachment *attachment,
sg_free_table(sg);
kfree(sg);
 
-   mutex_lock(>base.dev->struct_mutex);
i915_gem_object_unpin_pages(obj);
-   mutex_unlock(>base.dev->struct_mutex);
 }
 
 static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-   struct drm_device *dev = obj->base.dev;
-   void *addr;
-   int ret;
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ERR_PTR(ret);
-
-   addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-   mutex_unlock(>struct_mutex);
 
-   return addr;
+   return i915_gem_object_pin_map(obj, I915_MAP_WB);
 }
 
 static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-   struct drm_device *dev = obj->base.dev;
 
-   mutex_lock(>struct_mutex);
i915_gem_object_unpin_map(obj);
-   mutex_unlock(>struct_mutex);
 }
 
 static void *i915_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned 
long page_num)
@@ -177,32 +155,45 @@ static int i915_gem_begin_cpu_access(struct dma_buf 
*dma_buf, enum dma_data_dire
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
struct drm_device *dev = obj->base.dev;
-   int ret;
bool write = (direction == DMA_BIDIRECTIONAL || direction == 
DMA_TO_DEVICE);
+   int err;
 
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
+   err = i915_gem_object_pin_pages(obj);
+   if (err)
+   return err;
+
+   err = i915_mutex_lock_interruptible(dev);
+   if (err)
+   goto out;
 
-   ret = i915_gem_object_set_to_cpu_domain(obj, write);
+   err = i915_gem_object_set_to_cpu_domain(obj, write);
mutex_unlock(>struct_mutex);
-   return ret;
+
+out:
+   i915_gem_object_unpin_pages(obj);
+   return err;
 }
 
 static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum 
dma_data_direction direction)
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
struct drm_device *dev = obj->base.dev;
-   int ret;
+   int err;
 
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
+   err = i915_gem_object_pin_pages(obj);
+   if (err)
+   return err;
+
+   err = i915_mutex_lock_interruptible(dev);
+   if (err)
+   goto out;
 
-   ret = i915_gem_object_set_to_gtt_domain(obj, false);
+   err = i915_gem_object_set_to_gtt_domain(obj, false);
mutex_unlock(>struct_mutex);
 
-   return ret;
+out:
+   i915_gem_object_unpin_pages(obj);
+   return err;
 }
 
 static const struct dma_buf_ops i915_dmabuf_ops =  {
-- 
2.9.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org

[Intel-gfx] [PATCH 34/41] drm/i915: Defer breadcrumb emission

2016-10-20 Thread Chris Wilson
Move the actual emission of the breadcrumb for closing the request from
i915_add_request() to the submit callback. (It can be moved later when
required.) This allows us to defer the allocation of the global_seqno
from request construction to actual submission, allowing us to emit the
requests out of order (wrt to the order of their construction, they
still will only be executed one all of their dependencies are resolved
including that all earlier requests on their timeline have been
submitted.) We have to specialise how we then emit the request in order
to write into the preallocated space, rather than at the tail of the
ringbuffer (which will have been advanced by the addition of new
requests).

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem_request.c |  41 +++-
 drivers/gpu/drm/i915/intel_lrc.c| 120 ---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 169 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  10 +-
 4 files changed, 118 insertions(+), 222 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 8a5ae624b125..30292ed2bab2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -318,17 +318,16 @@ submit_notify(struct i915_sw_fence *fence, enum 
i915_sw_fence_notify state)
container_of(fence, typeof(*request), submit);
struct intel_engine_cs *engine = request->engine;
 
+   if (state != FENCE_COMPLETE)
+   return NOTIFY_DONE;
+
/* Will be called from irq-context when using foreign DMA fences */
 
-   switch (state) {
-   case FENCE_COMPLETE:
-   engine->timeline->last_submitted_seqno = request->fence.seqno;
-   engine->submit_request(request);
-   break;
+   engine->timeline->last_submitted_seqno = request->fence.seqno;
 
-   case FENCE_FREE:
-   break;
-   }
+   engine->emit_breadcrumb(request,
+   request->ring->vaddr + request->postfix);
+   engine->submit_request(request);
 
return NOTIFY_DONE;
 }
@@ -648,9 +647,7 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
struct intel_ring *ring = request->ring;
struct intel_timeline *timeline = request->timeline;
struct drm_i915_gem_request *prev;
-   u32 request_start;
-   u32 reserved_tail;
-   int ret;
+   int err;
 
lockdep_assert_held(>i915->drm.struct_mutex);
trace_i915_gem_request_add(request);
@@ -660,8 +657,6 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
 * should already have been reserved in the ring buffer. Let the ring
 * know that it is time to use that space up.
 */
-   request_start = ring->tail;
-   reserved_tail = request->reserved_space;
request->reserved_space = 0;
 
/*
@@ -672,10 +667,10 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
 * what.
 */
if (flush_caches) {
-   ret = engine->emit_flush(request, EMIT_FLUSH);
+   err = engine->emit_flush(request, EMIT_FLUSH);
 
/* Not allowed to fail! */
-   WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
+   WARN(err, "engine->emit_flush() failed: %d!\n", err);
}
 
/* Record the position of the start of the breadcrumb so that
@@ -683,20 +678,10 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
 * GPU processing the request, we never over-estimate the
 * position of the ring's HEAD.
 */
+   err = intel_ring_begin(request, engine->emit_breadcrumb_sz);
+   GEM_BUG_ON(err);
request->postfix = ring->tail;
-
-   /* Not allowed to fail! */
-   ret = engine->emit_breadcrumb(request);
-   WARN(ret, "(%s)->emit_breadcrumb failed: %d!\n", engine->name, ret);
-
-   /* Sanity check that the reserved size was large enough. */
-   ret = ring->tail - request_start;
-   if (ret < 0)
-   ret += ring->size;
-   WARN_ONCE(ret > reserved_tail,
- "Not enough space reserved (%d bytes) "
- "for adding the request (%d bytes)\n",
- reserved_tail, ret);
+   ring->tail += engine->emit_breadcrumb_sz * sizeof(u32);
 
/* Seal the request and mark it as pending execution. Note that
 * we may inspect this state, without holding any locks, during
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8229baebb2b3..fa3012c342cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -365,7 +365,7 @@ static u64 execlists_update_context(struct 

[Intel-gfx] [PATCH 29/41] drm/i915: Queue the idling context switch after all other timelines

2016-10-20 Thread Chris Wilson
Before suspend, we wait for the switch to the kernel context. In order
for all the other context images to be complete upon suspend, that
switch must be the last operation by the GPU (i.e. this idling request
must not overtake any pending requests). To make this request execute last,
we make it depend on every other inflight request.

Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_gem.c | 10 ++
 drivers/gpu/drm/i915/i915_gem_context.c | 23 +--
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 42344b1e4e0d..748020ad1033 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4360,6 +4360,15 @@ void __i915_gem_object_release_unless_active(struct 
drm_i915_gem_object *obj)
i915_gem_object_put(obj);
 }
 
+static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
+   for_each_engine(engine, dev_priv, id)
+   GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
+}
+
 int i915_gem_suspend(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
@@ -4389,6 +4398,7 @@ int i915_gem_suspend(struct drm_device *dev)
 
i915_gem_retire_requests(dev_priv);
 
+   assert_kernel_context_is_current(dev_priv);
i915_gem_context_lost(dev_priv);
mutex_unlock(>struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 47e888cc721f..a2acb8bb5f34 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -931,22 +931,33 @@ int i915_switch_context(struct drm_i915_gem_request *req)
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 {
struct intel_engine_cs *engine;
+   struct i915_gem_timeline *timeline;
enum intel_engine_id id;
 
+   lockdep_assert_held(_priv->drm.struct_mutex);
+
for_each_engine(engine, dev_priv, id) {
struct drm_i915_gem_request *req;
int ret;
 
-   if (engine->last_context == NULL)
-   continue;
-
-   if (engine->last_context == dev_priv->kernel_context)
-   continue;
-
req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
if (IS_ERR(req))
return PTR_ERR(req);
 
+   /* Queue this switch after all other activity */
+   list_for_each_entry(timeline, _priv->gt.timelines, link) {
+   struct drm_i915_gem_request *prev;
+   struct intel_timeline *tl;
+
+   tl = >engine[engine->id];
+   prev = i915_gem_active_raw(>last_request,
+  _priv->drm.struct_mutex);
+   if (prev)
+   i915_sw_fence_await_sw_fence_gfp(>submit,
+>submit,
+GFP_KERNEL);
+   }
+
ret = i915_switch_context(req);
i915_add_request_no_flush(req);
if (ret)
-- 
2.9.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


  1   2   3   >