Re: [Intel-gfx] [PATCH v2] drm/i915: Remove memory frequency calculation

2021-10-13 Thread Zhao, Yakui




On 2021/10/13 10:54, Matt Roper wrote:

On Tue, Oct 12, 2021 at 06:00:46PM -0700, José Roberto de Souza wrote:

This memory frequency calculated is only used to check if it is zero,
what is not useful as it will never actually be zero.

Also the calculation is wrong, we should be checking other bit to
select the appropriate frequency multiplier while this code is stuck
with a fixed multiplier.

So here dropping it as whole.

v2:
- Also remove memory frequency calculation for gen9 LP platforms

Cc: Yakui Zhao 
Cc: Matt Roper 
Fixes: f8112cb9574b ("drm/i915/gen11+: Only load DRAM information from pcode")
Signed-off-by: José Roberto de Souza 


Reviewed-by: Matt Roper 


After removing the check of memory frequency, the EHL SBL can work as 
expected. Otherwise it will fail some checks in intel_dram_detect 
because of incorrect memory frequency calculation.


Add: Tested-by: Zhao Yakui 



---
  drivers/gpu/drm/i915/i915_reg.h   |  8 
  drivers/gpu/drm/i915/intel_dram.c | 30 ++
  2 files changed, 2 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a897f4abea0c3..8825f7ac477b6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -11109,12 +11109,6 @@ enum skl_power_gate {
  #define  DC_STATE_DEBUG_MASK_CORES(1 << 0)
  #define  DC_STATE_DEBUG_MASK_MEMORY_UP(1 << 1)
  
-#define BXT_P_CR_MC_BIOS_REQ_0_0_0	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114)

-#define  BXT_REQ_DATA_MASK 0x3F
-#define  BXT_DRAM_CHANNEL_ACTIVE_SHIFT 12
-#define  BXT_DRAM_CHANNEL_ACTIVE_MASK  (0xF << 12)
-#define  BXT_MEMORY_FREQ_MULTIPLIER_HZ 1
-
  #define BXT_D_CR_DRP0_DUNIT8  0x1000
  #define BXT_D_CR_DRP0_DUNIT9  0x1200
  #define  BXT_D_CR_DRP0_DUNIT_START8
@@ -11145,9 +11139,7 @@ enum skl_power_gate {
  #define  BXT_DRAM_TYPE_LPDDR4 (0x2 << 22)
  #define  BXT_DRAM_TYPE_DDR4   (0x4 << 22)
  
-#define SKL_MEMORY_FREQ_MULTIPLIER_HZ		2

  #define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x5E04)
-#define  SKL_REQ_DATA_MASK (0xF << 0)
  #define  DG1_GEAR_TYPEREG_BIT(16)
  
  #define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000)

diff --git a/drivers/gpu/drm/i915/intel_dram.c 
b/drivers/gpu/drm/i915/intel_dram.c
index 30a0cab5eff46..0adadfd9528aa 100644
--- a/drivers/gpu/drm/i915/intel_dram.c
+++ b/drivers/gpu/drm/i915/intel_dram.c
@@ -244,7 +244,6 @@ static int
  skl_get_dram_info(struct drm_i915_private *i915)
  {
struct dram_info *dram_info = >dram_info;
-   u32 mem_freq_khz, val;
int ret;
  
  	dram_info->type = skl_get_dram_type(i915);

@@ -255,17 +254,6 @@ skl_get_dram_info(struct drm_i915_private *i915)
if (ret)
return ret;
  
-	val = intel_uncore_read(>uncore,

-   SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
-   mem_freq_khz = DIV_ROUND_UP((val & SKL_REQ_DATA_MASK) *
-   SKL_MEMORY_FREQ_MULTIPLIER_HZ, 1000);
-
-   if (dram_info->num_channels * mem_freq_khz == 0) {
-   drm_info(>drm,
-"Couldn't get system memory bandwidth\n");
-   return -EINVAL;
-   }
-
return 0;
  }
  
@@ -350,24 +338,10 @@ static void bxt_get_dimm_info(struct dram_dimm_info *dimm, u32 val)

  static int bxt_get_dram_info(struct drm_i915_private *i915)
  {
struct dram_info *dram_info = >dram_info;
-   u32 dram_channels;
-   u32 mem_freq_khz, val;
-   u8 num_active_channels, valid_ranks = 0;
+   u32 val;
+   u8 valid_ranks = 0;
int i;
  
-	val = intel_uncore_read(>uncore, BXT_P_CR_MC_BIOS_REQ_0_0_0);

-   mem_freq_khz = DIV_ROUND_UP((val & BXT_REQ_DATA_MASK) *
-   BXT_MEMORY_FREQ_MULTIPLIER_HZ, 1000);
-
-   dram_channels = val & BXT_DRAM_CHANNEL_ACTIVE_MASK;
-   num_active_channels = hweight32(dram_channels);
-
-   if (mem_freq_khz * num_active_channels == 0) {
-   drm_info(>drm,
-"Couldn't get system memory bandwidth\n");
-   return -EINVAL;
-   }
-
/*
 * Now read each DUNIT8/9/10/11 to check the rank of each dimms.
 */
--
2.33.0





Re: [Intel-gfx] [v1 10/10] drm/i915/gvt: GVTg support ppgtt pvmmio optimization

2018-10-11 Thread Zhao, Yakui



On 2018年10月11日 14:14, Xiaolin Zhang wrote:

This patch handles ppgtt update from g2v notification.

It read out ppgtt pte entries from guest pte tables page and
convert them to host pfns.

It creates local ppgtt tables and insert the content pages
into the local ppgtt tables directly, which does not track
the usage of guest page table and removes the cost of write
protection from the original shadow page mechansim.


It is possible that Guest VGPU writes the ppgtt entry by using 2M/64K 
page mode.


If so, the gvtg should also handle it in PVMMIO mode.



v1: rebase
v0: RFC

Signed-off-by: Xiaolin Zhang 
---
  drivers/gpu/drm/i915/gvt/gtt.c  | 318 
  drivers/gpu/drm/i915/gvt/gtt.h  |   9 +
  drivers/gpu/drm/i915/gvt/handlers.c |  13 +-
  3 files changed, 338 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 58e166e..8d3e21a 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1744,6 +1744,26 @@ static int ppgtt_handle_guest_write_page_table_bytes(
return 0;
  }
  
+static void invalidate_mm_pv(struct intel_vgpu_mm *mm)

+{
+   struct intel_vgpu *vgpu = mm->vgpu;
+   struct intel_gvt *gvt = vgpu->gvt;
+   struct intel_gvt_gtt *gtt = >gtt;
+   struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
+   struct intel_gvt_gtt_entry se;
+
+   i915_ppgtt_close(>ppgtt->vm);
+   i915_ppgtt_put(mm->ppgtt);
+
+   ppgtt_get_shadow_root_entry(mm, , 0);
+   if (!ops->test_present())
+   return;
+   se.val64 = 0;
+   ppgtt_set_shadow_root_entry(mm, , 0);
+
+   mm->ppgtt_mm.shadowed  = false;
+}
+
  static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
  {
struct intel_vgpu *vgpu = mm->vgpu;
@@ -1756,6 +1776,11 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
if (!mm->ppgtt_mm.shadowed)
return;
  
+	if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE) {

+   invalidate_mm_pv(mm);
+   return;
+   }
+
for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
ppgtt_get_shadow_root_entry(mm, , index);
  
@@ -1773,6 +1798,26 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)

mm->ppgtt_mm.shadowed = false;
  }
  
+static int shadow_mm_pv(struct intel_vgpu_mm *mm)

+{
+   struct intel_vgpu *vgpu = mm->vgpu;
+   struct intel_gvt *gvt = vgpu->gvt;
+   struct intel_gvt_gtt_entry se;
+
+   mm->ppgtt = i915_ppgtt_create(gvt->dev_priv, NULL);
+   if (IS_ERR(mm->ppgtt)) {
+   gvt_vgpu_err("fail to create ppgtt for pdp 0x%llx\n",
+   px_dma(>ppgtt->pml4));
+   return PTR_ERR(mm->ppgtt);
+   }
+
+   se.type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+   se.val64 = px_dma(>ppgtt->pml4);
+   ppgtt_set_shadow_root_entry(mm, , 0);
+   mm->ppgtt_mm.shadowed  = true;
+
+   return 0;
+}
  
  static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)

  {
@@ -1787,6 +1832,9 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
if (mm->ppgtt_mm.shadowed)
return 0;
  
+	if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE)

+   return shadow_mm_pv(mm);
+
mm->ppgtt_mm.shadowed = true;
  
  	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {

@@ -2767,3 +2815,273 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
intel_vgpu_destroy_all_ppgtt_mm(vgpu);
intel_vgpu_reset_ggtt(vgpu, true);
  }
+
+int intel_vgpu_g2v_pv_ppgtt_alloc_4lvl(struct intel_vgpu *vgpu,
+   u64 pdps[])
+{
+   struct intel_vgpu_mm *mm;
+   int ret = 0;
+   u32 offset;
+   struct pv_ppgtt_update pv_ppgtt;
+
+   offset = offsetof(struct gvt_shared_page, pv_ppgtt);
+   intel_gvt_read_shared_page(vgpu, offset, _ppgtt, sizeof(pv_ppgtt));
+
+   mm = intel_vgpu_find_ppgtt_mm(vgpu, _ppgtt.pdp);
+   if (!mm) {
+   gvt_vgpu_err("failed to find pdp 0x%llx\n", pv_ppgtt.pdp);
+   ret = -EINVAL;
+   } else {
+   ret = mm->ppgtt->vm.allocate_va_range(>ppgtt->vm,
+   pv_ppgtt.start, pv_ppgtt.length);
+   if (ret)
+   gvt_vgpu_err("failed to alloc %llx\n", pv_ppgtt.pdp);
+   }
+
+   return ret;
+}
+
+int intel_vgpu_g2v_pv_ppgtt_clear_4lvl(struct intel_vgpu *vgpu,
+   u64 pdps[])
+{
+   struct intel_vgpu_mm *mm;
+   int ret = 0;
+   u32 offset;
+   struct pv_ppgtt_update pv_ppgtt;
+
+   offset = offsetof(struct gvt_shared_page, pv_ppgtt);
+   intel_gvt_read_shared_page(vgpu, offset, _ppgtt, sizeof(pv_ppgtt));
+   mm = intel_vgpu_find_ppgtt_mm(vgpu, _ppgtt.pdp);
+   if (!mm) {
+   gvt_vgpu_err("failed to find pdp 0x%llx\n", pv_ppgtt.pdp);
+   ret = -EINVAL;
+   } else {
+   

Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU

2018-07-03 Thread Zhao, Yakui
>-Original Message-
>From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>Sent: Tuesday, July 3, 2018 10:08 PM
>To: Zhao, Yakui ; Daniel Vetter 
>Cc: intel-gfx@lists.freedesktop.org
>Subject: RE: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on
>VGPU
>
>Quoting Zhao, Yakui (2018-07-03 14:58:31)
>> >-Original Message-
>> >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>> >Sent: Tuesday, July 3, 2018 9:25 PM
>> >To: Zhao, Yakui ; Daniel Vetter
>> >
>> >Cc: intel-gfx@lists.freedesktop.org
>> >Subject: RE: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg
>> >only once on VGPU
>> >
>> >Quoting Zhao, Yakui (2018-07-03 13:47:46)
>> >>
>> >> >-Original Message-
>> >> >From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of
>> >> >Daniel Vetter
>> >> >Sent: Tuesday, July 3, 2018 5:52 PM
>> >> >To: Chris Wilson 
>> >> >Cc: Daniel Vetter ; Zhao, Yakui
>> >> >; intel-gfx@lists.freedesktop.org
>> >> >Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg
>> >> >only once on VGPU
>> >> >
>> >> >On Tue, Jul 03, 2018 at 10:05:28AM +0100, Chris Wilson wrote:
>> >> >> Quoting Daniel Vetter (2018-07-03 09:51:03)
>> >> >> > On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote:
>> >> >> > > On VGPU scenario the read/write operation of fence_reg will
>> >> >> > > be trapped by the GVT-g. And then gvt-g follows the HW spec
>> >> >> > > to write the
>> >> >fence_reg.
>> >> >> > > So it is unnecessary to read/write fence reg several times.
>> >> >> > > This will help to reduce the unnecessary trap of fence_reg
>> >> >> > > mmio
>> >operation.
>> >> >> > >
>> >> >> > > V1->V2: Fix one typo error of parameter when calling
>> >> >> > > V1->intel_vgpu_active
>> >> >> > >
>> >> >> > > Signed-off-by: Zhao Yakui 
>> >> >> >
>> >> >> > Ok this makes more sense. Except you need to put the 64bit
>> >> >> > entirely into the vpgu block, with a comment explaining why
>> >> >> > this is safe (since the vpgu will take care of updating fences 
>> >> >> > correctly).
>> >> >>
>> >> >> Except, who cares? Are fence registers being rewritten that
>> >> >> frequently that special casing vgpu is worth the hassle. Part of
>> >> >> that is that you need to leave a hint behind in the code that
>> >> >> (a) explains why it is safe after having the "here be dragons"
>> >> >> and (b) why we
>> >care.
>> >> >>
>> >> >> On a more pragmatic level if fencing doesn't plateau out to
>> >> >> steady state, that is a worrying amount of contention -- the
>> >> >> actual fence write itself would be the least of my worries.
>> >> >
>> >> >I can easily imagine that with the few per-client fences vgpu
>> >> >hands out rewrites are much more common. But yeah some real data
>> >> >would be
>> >good.
>> >> >And more reasons to get mesa off of the gtt mmaps.
>> >>
>> >> Hi, Daniel/Chris
>> >>
>> >>   Thanks for your comments.
>> >>   The fence reg is used to assure the access of Tiled surface
>> >> through aperature window. When fence is needed, the driver helps to
>> >> find one available fence reg and then configure it. After it is not
>> >> used, the
>> >fence will be turned off and then be allocated for next usage. It
>> >doesn't rely on the state of fence reg.  In such case we don't need
>> >to worry about the unsteady state.
>> >>
>> >>   For the VGPU operation: The op of fence reg is trapped.  Then
>> >> the gvt-g
>> >will follow the trapped value to program the fence_reg.
>> >> (It will turn off and then write the expected value for any trapped
>> >> write op
>> >of fence reg). The trapped op in GVT-g is safe.
>> >>
>> >>   Based on the current logic,  it needs the five traps when one
>> >> fence reg is
>> >configured under VGPU mode.(

Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU

2018-07-03 Thread Zhao, Yakui
>-Original Message-
>From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>Sent: Tuesday, July 3, 2018 9:25 PM
>To: Zhao, Yakui ; Daniel Vetter 
>Cc: intel-gfx@lists.freedesktop.org
>Subject: RE: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on
>VGPU
>
>Quoting Zhao, Yakui (2018-07-03 13:47:46)
>>
>> >-Original Message-
>> >From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of
>> >Daniel Vetter
>> >Sent: Tuesday, July 3, 2018 5:52 PM
>> >To: Chris Wilson 
>> >Cc: Daniel Vetter ; Zhao, Yakui
>> >; intel-gfx@lists.freedesktop.org
>> >Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg
>> >only once on VGPU
>> >
>> >On Tue, Jul 03, 2018 at 10:05:28AM +0100, Chris Wilson wrote:
>> >> Quoting Daniel Vetter (2018-07-03 09:51:03)
>> >> > On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote:
>> >> > > On VGPU scenario the read/write operation of fence_reg will be
>> >> > > trapped by the GVT-g. And then gvt-g follows the HW spec to
>> >> > > write the
>> >fence_reg.
>> >> > > So it is unnecessary to read/write fence reg several times.
>> >> > > This will help to reduce the unnecessary trap of fence_reg mmio
>operation.
>> >> > >
>> >> > > V1->V2: Fix one typo error of parameter when calling
>> >> > > V1->intel_vgpu_active
>> >> > >
>> >> > > Signed-off-by: Zhao Yakui 
>> >> >
>> >> > Ok this makes more sense. Except you need to put the 64bit
>> >> > entirely into the vpgu block, with a comment explaining why this
>> >> > is safe (since the vpgu will take care of updating fences correctly).
>> >>
>> >> Except, who cares? Are fence registers being rewritten that
>> >> frequently that special casing vgpu is worth the hassle. Part of
>> >> that is that you need to leave a hint behind in the code that (a)
>> >> explains why it is safe after having the "here be dragons" and (b) why we
>care.
>> >>
>> >> On a more pragmatic level if fencing doesn't plateau out to steady
>> >> state, that is a worrying amount of contention -- the actual fence
>> >> write itself would be the least of my worries.
>> >
>> >I can easily imagine that with the few per-client fences vgpu hands
>> >out rewrites are much more common. But yeah some real data would be
>good.
>> >And more reasons to get mesa off of the gtt mmaps.
>>
>> Hi, Daniel/Chris
>>
>>   Thanks for your comments.
>>   The fence reg is used to assure the access of Tiled surface
>> through aperature window. When fence is needed, the driver helps to
>> find one available fence reg and then configure it. After it is not used, the
>fence will be turned off and then be allocated for next usage. It doesn't rely 
>on
>the state of fence reg.  In such case we don't need to worry about the
>unsteady state.
>>
>>   For the VGPU operation: The op of fence reg is trapped.  Then the gvt-g
>will follow the trapped value to program the fence_reg.
>> (It will turn off and then write the expected value for any trapped write op
>of fence reg). The trapped op in GVT-g is safe.
>>
>>   Based on the current logic,  it needs the five traps when one fence 
>> reg is
>configured under VGPU mode.(Three writes, two reads).
>> If it is programmed in one 64-bit op under VGPU mode, only one trap is
>needed. And the GVT-g still can configure the expected fence_value.
>> As the trap is quite heavy for VGPU, the trap time can be saved.
>
>But the argument is can we avoid it entirely by never changing the fence. You
>say this is used for mapping through the aperture (GTT), we say userspace
>shouldn't be doing that for performance reasons :) A slow trap on top of a
>slow operation that is already causing contention seems more sensible to fix
>at source. (Albeit so long as the maintenance burden is considered and found
>to be reasonable, adding special cases with their rationale is acceptable.) So
>you have to sell why this mmio is worthy of special attention and curtail any
>future questions.

If the userspace driver/app can take care of the buffer allocation especially 
for the tiled
surface, maybe it can reduce the ratio of changing the fence. But this can't be 
avoided if the tiled
buffer is needed and allocated. This also depends on the userspace driver. And 
it is beyond the 
responsibility of the kernel driver. 

I

[Intel-gfx] [PATCH v3] drm/i915: Use 64-bit write to optimize writing fence_reg on VGPU

2018-07-03 Thread Zhao Yakui
On VGPU scenario the read/write operation of fence_reg will be trapped
by the GVT-g. Then gvt-g follows the HW spec to program the fence_reg.
And the gvt-g takes care of updating the fence reg correctly for any
trapped value of fence reg.

So it is unnecessary to read/write fence reg several times. It is enough 
that the fence reg is written only value in 64-bit mdoe. This will help
to reduce the redundantt trap of fence_reg mmio operation.

V1->V2: Fix one typo error of parameter when calling intel_vgpu_active.
V2->V3: Follow Chris Wilson and Daniel Vetter to add more descriptions.

Signed-off-by: Zhao Yakui 
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c 
b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d548ac0..7b10bf9 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -63,6 +63,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
i915_reg_t fence_reg_lo, fence_reg_hi;
int fence_pitch_shift;
u64 val;
+   struct drm_i915_private *dev_priv = fence->i915;
 
if (INTEL_GEN(fence->i915) >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
@@ -92,9 +93,17 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
val |= I965_FENCE_REG_VALID;
}
 
-   if (!pipelined) {
-   struct drm_i915_private *dev_priv = fence->i915;
-
+   if (intel_vgpu_active(dev_priv)) {
+   /* Use the 64-bit RW to write fence reg on VGPU mode.
+* The GVT-g can trap the written val of VGPU to program the
+* fence reg. And the fence write in gvt-g follows the
+* sequence of off/read/double-write/read. This assures that
+* the fence reg is configured correctly.
+* At the same time the 64-bit op can help to reduce the num
+* of VGPU trap for the fence reg.
+*/
+   I915_WRITE64_FW(fence_reg_lo, val);
+   } else {
/* To w/a incoherency with non-atomic 64-bit register updates,
 * we split the 64-bit update into two 32-bit writes. In order
 * for a partial fence not to be evaluated between writes, we
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU

2018-07-03 Thread Zhao, Yakui

>-Original Message-
>From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of Daniel Vetter
>Sent: Tuesday, July 3, 2018 5:52 PM
>To: Chris Wilson 
>Cc: Daniel Vetter ; Zhao, Yakui ;
>intel-gfx@lists.freedesktop.org
>Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on
>VGPU
>
>On Tue, Jul 03, 2018 at 10:05:28AM +0100, Chris Wilson wrote:
>> Quoting Daniel Vetter (2018-07-03 09:51:03)
>> > On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote:
>> > > On VGPU scenario the read/write operation of fence_reg will be
>> > > trapped by the GVT-g. And then gvt-g follows the HW spec to write the
>fence_reg.
>> > > So it is unnecessary to read/write fence reg several times. This
>> > > will help to reduce the unnecessary trap of fence_reg mmio operation.
>> > >
>> > > V1->V2: Fix one typo error of parameter when calling
>> > > V1->intel_vgpu_active
>> > >
>> > > Signed-off-by: Zhao Yakui 
>> >
>> > Ok this makes more sense. Except you need to put the 64bit entirely
>> > into the vpgu block, with a comment explaining why this is safe
>> > (since the vpgu will take care of updating fences correctly).
>>
>> Except, who cares? Are fence registers being rewritten that frequently
>> that special casing vgpu is worth the hassle. Part of that is that you
>> need to leave a hint behind in the code that (a) explains why it is
>> safe after having the "here be dragons" and (b) why we care.
>>
>> On a more pragmatic level if fencing doesn't plateau out to steady
>> state, that is a worrying amount of contention -- the actual fence
>> write itself would be the least of my worries.
>
>I can easily imagine that with the few per-client fences vgpu hands out
>rewrites are much more common. But yeah some real data would be good.
>And more reasons to get mesa off of the gtt mmaps.

Hi, Daniel/Chris

  Thanks for your comments.
  The fence reg is used to assure the access of Tiled surface through 
aperature window. When fence is needed, the driver
helps to find one available fence reg and then configure it. After it is not 
used, the fence will be turned off and then be allocated
for next usage. It doesn't rely on the state of fence reg.  In such case we 
don't need to worry about the unsteady state.

  For the VGPU operation: The op of fence reg is trapped.  Then the gvt-g 
will follow the trapped value to program the fence_reg.
(It will turn off and then write the expected value for any trapped write op of 
fence reg). The trapped op in GVT-g is safe.

  Based on the current logic,  it needs the five traps when one fence reg 
is configured under VGPU mode.(Three writes, two reads). 
If it is programmed in one 64-bit op under VGPU mode, only one trap is needed. 
And the GVT-g still can configure the expected fence_value.
As the trap is quite heavy for VGPU, the trap time can be saved.

  I will put some description in the code and commit log in next version.
   
>-Daniel
>--
>Daniel Vetter
>Software Engineer, Intel Corporation
>http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU

2018-07-03 Thread Zhao, Yakui


>-Original Message-
>From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of Daniel Vetter
>Sent: Tuesday, July 3, 2018 4:51 PM
>To: Zhao, Yakui 
>Cc: intel-gfx@lists.freedesktop.org
>Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on
>VGPU
>
>On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote:
>> On VGPU scenario the read/write operation of fence_reg will be trapped
>> by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg.
>> So it is unnecessary to read/write fence reg several times. This will
>> help to reduce the unnecessary trap of fence_reg mmio operation.
>>
>> V1->V2: Fix one typo error of parameter when calling intel_vgpu_active
>>
>> Signed-off-by: Zhao Yakui 
>
>Ok this makes more sense. Except you need to put the 64bit entirely into the
>vpgu block, with a comment explaining why this is safe (since the vpgu will
>take care of updating fences correctly).

Thanks for your comment and reply.
The gvt-g will take care how to update the fence reg.

I will refine the comments and commit log.

>-Daniel
>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +-
>>  1 file changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>> b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>> index d92fe03..9c97976 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>> @@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct
>> drm_i915_fence_reg *fence,
>>
>>  if (INTEL_GEN(fence->i915) >= 6) {
>>  /* Use the 64-bit RW to read/write fence reg on SNB+ */
>> -I915_WRITE64_FW(fence_reg_lo, 0);
>> -I915_READ64(fence_reg_lo);
>> -
>> -I915_WRITE64_FW(fence_reg_lo, val);
>> -I915_READ64(fence_reg_lo);
>> +if (intel_vgpu_active(dev_priv))
>> +I915_WRITE64_FW(fence_reg_lo, val);
>> +else {
>> +I915_WRITE64_FW(fence_reg_lo, 0);
>> +I915_READ64(fence_reg_lo);
>> +
>> +I915_WRITE64_FW(fence_reg_lo, val);
>> +I915_READ64(fence_reg_lo);
>> +}
>>  } else {
>>  /* To w/a incoherency with non-atomic 64-bit register updates,
>>   * we split the 64-bit update into two 32-bit writes. In order
>> --
>> 2.7.4
>>
>> ___
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>--
>Daniel Vetter
>Software Engineer, Intel Corporation
>http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 1/2] drm/i915: Use 64-bit to Read/Write fence reg on SNB+

2018-07-03 Thread Zhao, Yakui


>-Original Message-
>From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>Sent: Tuesday, July 3, 2018 5:01 PM
>To: Daniel Vetter ; Zhao, Yakui 
>Cc: intel-gfx@lists.freedesktop.org
>Subject: Re: [Intel-gfx] [PATCH v2 1/2] drm/i915: Use 64-bit to Read/Write
>fence reg on SNB+
>
>Quoting Daniel Vetter (2018-07-03 09:49:29)
>> On Tue, Jul 03, 2018 at 10:56:16AM +0800, Zhao Yakui wrote:
>> > Based on HW spec the fence reg on SNB+ is defined as 64-bit. Just
>> > follow the b-spec to  use 64-bit read/write mode.
>> >
>> > Signed-off-by: Zhao Yakui 
>>
>> Please use git blame to understand why you've just re-introduced a bug
>> that took months to debug.
>
>And there's even a very nice comment explaining exactly what the HW does
>and why the double write is required.
>
>First rule of IT: turn if off and on again.

Hi, Chris/Daniel

   Thanks for the detailed explanation.  I check the history of this issue. 
 It was one commit about five years ago.
Maybe the op of fence reg on HW doesn't follow its description very strictly. 
Not sure whether it is changed on the latest HW.  
OK. Please ignore this patch as the double write is safer.

>-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU

2018-07-02 Thread Zhao Yakui
On VGPU scenario the read/write operation of fence_reg will be trapped
by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg.
So it is unnecessary to read/write fence reg several times. This will help
to reduce the unnecessary trap of fence_reg mmio operation.

V1->V2: Fix one typo error of parameter when calling intel_vgpu_active

Signed-off-by: Zhao Yakui 
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c 
b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d92fe03..9c97976 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
 
if (INTEL_GEN(fence->i915) >= 6) {
/* Use the 64-bit RW to read/write fence reg on SNB+ */
-   I915_WRITE64_FW(fence_reg_lo, 0);
-   I915_READ64(fence_reg_lo);
-
-   I915_WRITE64_FW(fence_reg_lo, val);
-   I915_READ64(fence_reg_lo);
+   if (intel_vgpu_active(dev_priv))
+   I915_WRITE64_FW(fence_reg_lo, val);
+   else {
+   I915_WRITE64_FW(fence_reg_lo, 0);
+   I915_READ64(fence_reg_lo);
+
+   I915_WRITE64_FW(fence_reg_lo, val);
+   I915_READ64(fence_reg_lo);
+   }
} else {
/* To w/a incoherency with non-atomic 64-bit register updates,
 * we split the 64-bit update into two 32-bit writes. In order
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 0/2] drm/i915: Optimize the read/write fence_reg on SNB+

2018-07-02 Thread Zhao Yakui
V1->V2: Fix one typo error.

Zhao Yakui (2):
  drm/i915: Use 64-bit to Read/Write fence reg on SNB+
  drm/i915: write fence reg only once on VGPU

 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 1/2] drm/i915: Use 64-bit to Read/Write fence reg on SNB+

2018-07-02 Thread Zhao Yakui
Based on HW spec the fence reg on SNB+ is defined as 64-bit. Just follow
the b-spec to  use 64-bit read/write mode.

Signed-off-by: Zhao Yakui 
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c 
b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d548ac0..d92fe03 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -63,6 +63,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
i915_reg_t fence_reg_lo, fence_reg_hi;
int fence_pitch_shift;
u64 val;
+   struct drm_i915_private *dev_priv = fence->i915;
 
if (INTEL_GEN(fence->i915) >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
@@ -92,9 +93,14 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
val |= I965_FENCE_REG_VALID;
}
 
-   if (!pipelined) {
-   struct drm_i915_private *dev_priv = fence->i915;
+   if (INTEL_GEN(fence->i915) >= 6) {
+   /* Use the 64-bit RW to read/write fence reg on SNB+ */
+   I915_WRITE64_FW(fence_reg_lo, 0);
+   I915_READ64(fence_reg_lo);
 
+   I915_WRITE64_FW(fence_reg_lo, val);
+   I915_READ64(fence_reg_lo);
+   } else {
/* To w/a incoherency with non-atomic 64-bit register updates,
 * we split the 64-bit update into two 32-bit writes. In order
 * for a partial fence not to be evaluated between writes, we
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: write fence reg only once on VGPU

2018-07-02 Thread Zhao, Yakui


>-Original Message-
>From: Zhao, Yakui
>Sent: Tuesday, July 3, 2018 10:22 AM
>To: intel-gfx@lists.freedesktop.org
>Cc: zhen...@linux.intel.com; Zhao, Yakui 
>Subject: [PATCH 2/2] drm/i915: write fence reg only once on VGPU
>
>On VGPU scenario the read/write operation of fence_reg will be trapped by
>the GVT-g. And then gvt-g follows the HW spec to write the fence_reg.
>So it is unnecessary to read/write fence reg several times. This will help to
>reduce the unnecessary trap of fence_reg mmio operation.
>

Sorry for one typo. The V2 will be sent.

>Signed-off-by: Zhao Yakui 
>---
> drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +-
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>index d92fe03..55bf6d9 100644
>--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>@@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct
>drm_i915_fence_reg *fence,
>
>   if (INTEL_GEN(fence->i915) >= 6) {
>   /* Use the 64-bit RW to read/write fence reg on SNB+ */
>-  I915_WRITE64_FW(fence_reg_lo, 0);
>-  I915_READ64(fence_reg_lo);
>-
>-  I915_WRITE64_FW(fence_reg_lo, val);
>-  I915_READ64(fence_reg_lo);
>+  if (intel_vgpu_active(i915))
>+  I915_WRITE64_FW(fence_reg_lo, val);
>+  else {
>+  I915_WRITE64_FW(fence_reg_lo, 0);
>+  I915_READ64(fence_reg_lo);
>+
>+  I915_WRITE64_FW(fence_reg_lo, val);
>+  I915_READ64(fence_reg_lo);
>+  }
>   } else {
>   /* To w/a incoherency with non-atomic 64-bit register updates,
>* we split the 64-bit update into two 32-bit writes. In order
>--
>2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: write fence reg only once on VGPU

2018-07-02 Thread Zhao Yakui
On VGPU scenario the read/write operation of fence_reg will be trapped
by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg.
So it is unnecessary to read/write fence reg several times. This will help
to reduce the unnecessary trap of fence_reg mmio operation.

Signed-off-by: Zhao Yakui 
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c 
b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d92fe03..55bf6d9 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
 
if (INTEL_GEN(fence->i915) >= 6) {
/* Use the 64-bit RW to read/write fence reg on SNB+ */
-   I915_WRITE64_FW(fence_reg_lo, 0);
-   I915_READ64(fence_reg_lo);
-
-   I915_WRITE64_FW(fence_reg_lo, val);
-   I915_READ64(fence_reg_lo);
+   if (intel_vgpu_active(i915))
+   I915_WRITE64_FW(fence_reg_lo, val);
+   else {
+   I915_WRITE64_FW(fence_reg_lo, 0);
+   I915_READ64(fence_reg_lo);
+
+   I915_WRITE64_FW(fence_reg_lo, val);
+   I915_READ64(fence_reg_lo);
+   }
} else {
/* To w/a incoherency with non-atomic 64-bit register updates,
 * we split the 64-bit update into two 32-bit writes. In order
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 0/2] drm/i915: the Read/Write optimization of fence reg

2018-07-02 Thread Zhao Yakui

Zhao Yakui (2):
  drm/i915: Use 64-bit to Read/Write fence reg on SNB+
  drm/i915: write fence reg only once on VGPU

 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915: Use 64-bit to Read/Write fence reg on SNB+

2018-07-02 Thread Zhao Yakui
Based on HW spec the fence reg on SNB+ is defined as 64-bit. Just follow
the b-spec to  use 64-bit read/write mode.

Signed-off-by: Zhao Yakui 
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c 
b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d548ac0..d92fe03 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -63,6 +63,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
i915_reg_t fence_reg_lo, fence_reg_hi;
int fence_pitch_shift;
u64 val;
+   struct drm_i915_private *dev_priv = fence->i915;
 
if (INTEL_GEN(fence->i915) >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
@@ -92,9 +93,14 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg 
*fence,
val |= I965_FENCE_REG_VALID;
}
 
-   if (!pipelined) {
-   struct drm_i915_private *dev_priv = fence->i915;
+   if (INTEL_GEN(fence->i915) >= 6) {
+   /* Use the 64-bit RW to read/write fence reg on SNB+ */
+   I915_WRITE64_FW(fence_reg_lo, 0);
+   I915_READ64(fence_reg_lo);
 
+   I915_WRITE64_FW(fence_reg_lo, val);
+   I915_READ64(fence_reg_lo);
+   } else {
/* To w/a incoherency with non-atomic 64-bit register updates,
 * we split the 64-bit update into two 32-bit writes. In order
 * for a partial fence not to be evaluated between writes, we
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC

2018-06-22 Thread Zhao, Yakui


>-Original Message-
>From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>Sent: Friday, June 22, 2018 3:37 PM
>To: Zhao, Yakui ; intel-gfx@lists.freedesktop.org
>Subject: RE: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context
>buffer on the platforms without LLC
>
>Quoting Zhao, Yakui (2018-06-22 08:29:15)
>>
>>
>> >-Original Message-
>> >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>> >Sent: Friday, June 22, 2018 2:36 PM
>> >To: Zhao, Yakui ;
>> >intel-gfx@lists.freedesktop.org
>> >Cc: Zhao, Yakui 
>> >Subject: Re: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists
>> >context buffer on the platforms without LLC
>> >
>> >Quoting Zhao Yakui (2018-06-22 07:09:10)
>> >> @@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context
>*ctx,
>> >> struct intel_engine_cs *engine,
>> >> struct intel_ring *ring)  {
>> >> +   enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB :
>> >> + I915_MAP_WC;
>> >> void *vaddr;
>> >> u32 *regs;
>> >> int ret;
>> >> @@ -2738,13 +2740,12 @@ populate_lr_context(struct
>i915_gem_context
>> >*ctx,
>> >> return ret;
>> >> }
>> >>
>> >> -   vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
>> >> +   vaddr = i915_gem_object_pin_map(ctx_obj, map);
>> >
>> >As this uses the cpu domain and flushed afterwards, this one is
>> >correct in its usage of MAP_WB.
>>
>> In this function the content of context state is flushed.
>>
>> But the function of execlists_submit_ports will update it again before 
>> writing
>the ELSP port.
>> And there is no flush. In fact after the ELSP port is written, the HW will 
>> start
>to execute the submitted commands.
>
>That's a different map.

Really?  It is allocated in one gem obj.

Will you please help to point out where to handle the different map?

Thanks
 Yakui

>-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC

2018-06-22 Thread Zhao, Yakui


>-Original Message-
>From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>Sent: Friday, June 22, 2018 2:26 PM
>To: Zhao, Yakui ; intel-gfx@lists.freedesktop.org
>Cc: Zhao, Yakui 
>Subject: Re: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context
>buffer on the platforms without LLC
>
>Quoting Zhao Yakui (2018-06-22 07:09:10)
>> Under execlists mode the context buffer is allocated in global Gtt region.
>> The I915_MAP_WB type is used to map the buffer so that the driver can
>> initialize the context buffer.(Ring reg, Context Ctrl reg and so on).
>> And then __context_pin is called to flush back corresponding contents.
>> In fact as it also tries to update context buffer (Ring Tail offset)
>> before writing the ELSP port, it has no explicit cache flsuh.Maybe it
>> is handled by HW. But this is quite confusing as BXT has no LLC. So
>> the WC is used to map the context buffer on the platform without LLC
>> and the update of context buffer is writen into phys page directly. It
>> will be safer.
>>
>> V1->V2: Remove the dirty flag of execlists state buffer and one minor
>> typo in commit log
>
>The object's pages are still dirty, so why? It's not about CPU cache dirt, 
>here it
>is about whether the pages differ from any potential swapcache.
>

Based on the test it seems that this patch still has some problems. More works 
are needed in order to change the MAP type.
Maybe this buffer should be handled like intel_ring biffer.
I will check it later.

>I was anticipating there would be some type conflict with
>engine->pinned_default_state, but that just happens to work out
>correctly... so long as there is always a retirement during load and we park
>before any reset. Hmm.
>-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC

2018-06-22 Thread Zhao, Yakui


>-Original Message-
>From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
>Sent: Friday, June 22, 2018 2:36 PM
>To: Zhao, Yakui ; intel-gfx@lists.freedesktop.org
>Cc: Zhao, Yakui 
>Subject: Re: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context
>buffer on the platforms without LLC
>
>Quoting Zhao Yakui (2018-06-22 07:09:10)
>> @@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>> struct intel_engine_cs *engine,
>> struct intel_ring *ring)  {
>> +   enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB :
>> + I915_MAP_WC;
>> void *vaddr;
>> u32 *regs;
>> int ret;
>> @@ -2738,13 +2740,12 @@ populate_lr_context(struct i915_gem_context
>*ctx,
>> return ret;
>> }
>>
>> -   vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
>> +   vaddr = i915_gem_object_pin_map(ctx_obj, map);
>
>As this uses the cpu domain and flushed afterwards, this one is correct in its
>usage of MAP_WB.

In this function the content of context state is flushed.

But the function of execlists_submit_ports will update it again before writing 
the ELSP port.
And there is no flush. In fact after the ELSP port is written, the HW will 
start to execute the submitted commands.


>-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC

2018-06-22 Thread Zhao Yakui
Under execlists mode the context buffer is allocated in global Gtt region.
The I915_MAP_WB type is used to map the buffer so that the driver can
initialize the context buffer.(Ring reg, Context Ctrl reg and so on).
And then __context_pin is called to flush back corresponding contents.
In fact as it also tries to update context buffer (Ring Tail offset)
before writing the ELSP port, it has no explicit cache flsuh.Maybe it is
handled by HW. But this is quite confusing as BXT has no LLC. So the WC
is used to map the context buffer on the platform without LLC and the
update of context buffer is writen into phys page directly. It will
be safer.

V1->V2: Remove the dirty flag of execlists state buffer and one minor
typo in commit log

Signed-off-by: Zhao Yakui 
CC: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 10deebe..5ffd76e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1386,6 +1386,7 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 {
void *vaddr;
int ret;
+   enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC;
 
ret = execlists_context_deferred_alloc(ctx, engine, ce);
if (ret)
@@ -1396,7 +1397,7 @@ __execlists_context_pin(struct intel_engine_cs *engine,
if (ret)
goto err;
 
-   vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+   vaddr = i915_gem_object_pin_map(ce->state->obj, map);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
goto unpin_vma;
@@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
struct intel_ring *ring)
 {
+   enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC;
void *vaddr;
u32 *regs;
int ret;
@@ -2738,13 +2740,12 @@ populate_lr_context(struct i915_gem_context *ctx,
return ret;
}
 
-   vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+   vaddr = i915_gem_object_pin_map(ctx_obj, map);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
return ret;
}
-   ctx_obj->mm.dirty = true;
 
if (engine->default_state) {
/*
@@ -2756,7 +2757,7 @@ populate_lr_context(struct i915_gem_context *ctx,
void *defaults;
 
defaults = i915_gem_object_pin_map(engine->default_state,
-  I915_MAP_WB);
+  map);
if (IS_ERR(defaults)) {
ret = PTR_ERR(defaults);
goto err_unpin_ctx;
@@ -2851,6 +2852,7 @@ void intel_lr_context_resume(struct drm_i915_private 
*dev_priv)
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
enum intel_engine_id id;
+   enum i915_map_type map = HAS_LLC(dev_priv) ? I915_MAP_WB : I915_MAP_WC;
 
/* Because we emit WA_TAIL_DWORDS there may be a disparity
 * between our bookkeeping in ce->ring->head and ce->ring->tail and
@@ -2872,7 +2874,7 @@ void intel_lr_context_resume(struct drm_i915_private 
*dev_priv)
continue;
 
reg = i915_gem_object_pin_map(ce->state->obj,
- I915_MAP_WB);
+ map);
if (WARN_ON(IS_ERR(reg)))
continue;
 
@@ -2880,7 +2882,6 @@ void intel_lr_context_resume(struct drm_i915_private 
*dev_priv)
reg[CTX_RING_HEAD+1] = 0;
reg[CTX_RING_TAIL+1] = 0;
 
-   ce->state->obj->mm.dirty = true;
i915_gem_object_unpin_map(ce->state->obj);
 
intel_ring_reset(ce->ring, 0);
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC

2018-06-21 Thread Zhao Yakui
Under execlists mode the context buffer is allocated in global Gtt region.
The I915_MAP_WB type is used to map the buffer so that the driver can
initialize the context buffer.(Ring reg, Context Ctrl reg and so on).
And then __context_pin is called to flush back corresponding contents.
In fact as it also tries to update context buffer (Ring Tail offset)
before writting the ELSP port, it has no explicit cache flsuh.Maybe it is
handled by HW. But this is quite confusing as BXT has no LLC. So the WC
is used to map the context buffer on the platform without LLC and the
update of context buffer is writen into phys page directly. It will
be safer.

Signed-off-by: Zhao Yakui 
CC: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 10deebe..a76ea83 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1386,6 +1386,7 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 {
void *vaddr;
int ret;
+   enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC;
 
ret = execlists_context_deferred_alloc(ctx, engine, ce);
if (ret)
@@ -1396,7 +1397,7 @@ __execlists_context_pin(struct intel_engine_cs *engine,
if (ret)
goto err;
 
-   vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+   vaddr = i915_gem_object_pin_map(ce->state->obj, map);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
goto unpin_vma;
@@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
struct intel_ring *ring)
 {
+   enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC;
void *vaddr;
u32 *regs;
int ret;
@@ -2738,7 +2740,7 @@ populate_lr_context(struct i915_gem_context *ctx,
return ret;
}
 
-   vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+   vaddr = i915_gem_object_pin_map(ctx_obj, map);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
@@ -2756,7 +2758,7 @@ populate_lr_context(struct i915_gem_context *ctx,
void *defaults;
 
defaults = i915_gem_object_pin_map(engine->default_state,
-  I915_MAP_WB);
+  map);
if (IS_ERR(defaults)) {
ret = PTR_ERR(defaults);
goto err_unpin_ctx;
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries

2016-07-13 Thread Zhao Yakui

On 07/13/2016 06:04 PM, Deak, Imre wrote:

Hi Yakui,

thanks for taking a look at these, see my comment below.

On ke, 2016-07-13 at 10:22 +0800, Zhao Yakui wrote:

On 07/01/2016 09:40 PM, Deak, Imre wrote:

The purpose for each MOCS entry isn't well defined atm. Defining these
is important to remove any uncertainty about the use of these entries
for example in terms of performance and GPU/CPU coherency.

Suggested by Ville.

CC: Rong R Yang<rong.r.y...@intel.com>
CC: Yakui Zhao<yakui.z...@intel.com>
CC: Ville Syrjälä<ville.syrj...@linux.intel.com>
CC: Chris Wilson<ch...@chris-wilson.co.uk>
Signed-off-by: Imre Deak<imre.d...@intel.com>


This looks readable and meaningful after giving proper names to MOCS
entry index.

But not sure whether the comment of I915_MOCS_CACHE has one typo?


---
   drivers/gpu/drm/i915/intel_mocs.c | 13 +++--
   include/uapi/drm/i915_drm.h   | 24 
   2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 927825f..86adc11 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
*   end.
*/
   static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-   { /* 0x0009 */
+   [I915_MOCS_UNCACHED] = {
+ /* 0x0009 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
@@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {
  /* 0x0010 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
},
-   {
+   [I915_MOCS_AUTO] = {
  /* 0x0038 */
  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {
  /* 0x0030 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
-   {
+   [I915_MOCS_CACHED] = {
  /* 0x003b */
  .control_value = LE_CACHEABILITY(LE_WB) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {

   /* NOTE: the LE_TGT_CACHE is not used on Broxton */
   static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-   {
+   [I915_MOCS_UNCACHED] = {
  /* 0x0009 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
  /* 0x0010 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
},
-   {
+   [I915_MOCS_AUTO] = {
  /* 0x0038 */
  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
  /* 0x0030 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
-   {
+   [I915_MOCS_CACHED] = {
  /* 0x0039 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c17d63d..a5d116f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,30 @@ extern "C" {
   #define I915_ERROR_UEVENT"ERROR"
   #define I915_RESET_UEVENT"RESET"

+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum i915_mocs_table_index {
+   /*
+* Not cached anywhere, coherency between CPU and GPU accesses is
+* guaranteed.
+*/
+   I915_MOCS_UNCACHED,
+   /*
+* Cacheability and coherency controlled by the kernel automatically
+* based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
+* usage of the surface (used for display scanout or not).
+*/
+   I915_MOCS_AUTO,
+   /*
+* Cached in all GPU caches available on the platform.
+* Coherency between CPU and GPU accesses to the surface is not
+* guaranteed without extra synchronization.
+*/


IMO the coherency is guaranteed without extra synchronization for the
MOCS_CACHED.


No. On BXT it will make the data cached in GPU caches but will not keep
the data coherent between GPU and CPU without extra synchronization.
For that we would need to enable snooping, but that has considerable
overhead, so we turn that off in patch 2/3. On

Re: [Intel-gfx] [PATCH v3 2/3] drm/i915/bxt: Fix inadvertent CPU snooping due to incorrect MOCS config

2016-07-12 Thread Zhao Yakui

On 07/01/2016 09:40 PM, Deak, Imre wrote:

Setting a write-back cache policy in the MOCS entry definition also
implies snooping, which has a considerable overhead. This is
unexpected for a few reasons:
- From user-space's point of view since it didn't want a coherent
   surface (it didn't set the buffer as such via the set caching IOCTL).
- There is a separate MOCS entry field for snooping (which we never
   set).
- This MOCS table is about caching in (e)LLC and there is no (e)LLC on
   BXT. There is a separate table for L3 cache control.

Considering the above the current behavior of snooping looks like an
unintentional side-effect of the WB setting. Changing it to be LLC-UC
gets rid of the snooping without any ill-effects. For a coherent
surface the application would use a separate MOCS entry at index 1 and
call the set caching IOCTL to setup the PTE entries for the
corresponding buffer to be snooped. In the future we could also add a
new MOCS entry for coherent surfaces.

This resulted in 70% improvement in synthetic texturing benchmarks.

Kudos to Valtteri Rantala, Eero Tamminen and Michael T Frederick and
Ville who helped to narrow the source of problem to the kernel and to
the snooping behaviour in particular.

With a follow-up change to adjust the 3rd entry value
igt/gem_mocs_settings is passing after this change.

v2:
- Rebase on v2 of patch 1/2.
v3:
- Set the entry as LLC uncached instead of PTE-passthrough. This way
   we also keep snooping disabled, but we also make the cacheability/
   coherency setting indepent of the PTE which is managed by the
   kernel. (Chris)

CC: Rong R Yang<rong.r.y...@intel.com>
CC: Yakui Zhao<yakui.z...@intel.com>
CC: Valtteri Rantala<valtteri.rant...@intel.com>
CC: Eero Tamminen<eero.t.tammi...@intel.com>
CC: Michael T Frederick<michael.t.freder...@intel.com>
CC: Ville Syrjälä<ville.syrj...@linux.intel.com>
CC: Chris Wilson<ch...@chris-wilson.co.uk>
Signed-off-by: Imre Deak<imre.d...@intel.com>


As the BXT has no LLC, setting the WB-policy will add the extra 
overhead. In such case the patch looks more reasonable for BXT.


Add: Acked-by: Zhao Yakui <yakui.z...@intel.com>


---
  drivers/gpu/drm/i915/intel_mocs.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index d36e609..927825f 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -149,8 +149,8 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
{
- /* 0x003b */
- .control_value = LE_CACHEABILITY(LE_WB) |
+ /* 0x0039 */
+ .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
   LE_PFM(0) | LE_SCF(0),


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries

2016-07-12 Thread Zhao Yakui

On 07/01/2016 09:40 PM, Deak, Imre wrote:

The purpose for each MOCS entry isn't well defined atm. Defining these
is important to remove any uncertainty about the use of these entries
for example in terms of performance and GPU/CPU coherency.

Suggested by Ville.

CC: Rong R Yang
CC: Yakui Zhao
CC: Ville Syrjälä
CC: Chris Wilson
Signed-off-by: Imre Deak


This looks readable and meaningful after giving proper names to MOCS 
entry index.


But not sure whether the comment of I915_MOCS_CACHE has one typo?


---
  drivers/gpu/drm/i915/intel_mocs.c | 13 +++--
  include/uapi/drm/i915_drm.h   | 24 
  2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 927825f..86adc11 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
   *   end.
   */
  static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-   { /* 0x0009 */
+   [I915_MOCS_UNCACHED] = {
+ /* 0x0009 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
@@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {
  /* 0x0010 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
},
-   {
+   [I915_MOCS_AUTO] = {
  /* 0x0038 */
  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {
  /* 0x0030 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
-   {
+   [I915_MOCS_CACHED] = {
  /* 0x003b */
  .control_value = LE_CACHEABILITY(LE_WB) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {

  /* NOTE: the LE_TGT_CACHE is not used on Broxton */
  static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-   {
+   [I915_MOCS_UNCACHED] = {
  /* 0x0009 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
  /* 0x0010 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
},
-   {
+   [I915_MOCS_AUTO] = {
  /* 0x0038 */
  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
  /* 0x0030 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
-   {
+   [I915_MOCS_CACHED] = {
  /* 0x0039 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c17d63d..a5d116f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,30 @@ extern "C" {
  #define I915_ERROR_UEVENT "ERROR"
  #define I915_RESET_UEVENT "RESET"

+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum i915_mocs_table_index {
+   /*
+* Not cached anywhere, coherency between CPU and GPU accesses is
+* guaranteed.
+*/
+   I915_MOCS_UNCACHED,
+   /*
+* Cacheability and coherency controlled by the kernel automatically
+* based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
+* usage of the surface (used for display scanout or not).
+*/
+   I915_MOCS_AUTO,
+   /*
+* Cached in all GPU caches available on the platform.
+* Coherency between CPU and GPU accesses to the surface is not
+* guaranteed without extra synchronization.
+*/


IMO the coherency is guaranteed without extra synchronization for the 
MOCS_CACHED.



+   I915_MOCS_CACHED,
+};
+
  /* Each region is a minimum of 16k, and there are at most 255 of them.
   */
  #define I915_NR_TEX_REGIONS 255   /* table size 2k - maximum due to use


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 1/3] drm/i915/gen9: Clean up MOCS table definitions

2016-07-12 Thread Zhao Yakui

On 07/01/2016 09:40 PM, Deak, Imre wrote:

Use named struct initializers for clarity. Also fix the target cache
definition to reflect its role in GEN9 onwards. On GEN8 a TC value of 0
meant ELLC but on GEN9+ it means the TC and LRU controls are taken from
the PTE.

No functional change, igt/gem_mocs_settings still passing after this
change.

v2: (Chris)
- Add back the hexa literals for the entries.
   Add note that igt/gem_mocs_settings still passes.

CC: Rong R Yang<rong.r.y...@intel.com>
CC: Yakui Zhao<yakui.z...@intel.com>
CC: Chris Wilson<ch...@chris-wilson.co.uk>
Signed-off-by: Imre Deak<imre.d...@intel.com>


It is helpful to understand the MOCS table definition after cleaning up.

Add: Acked-by: Zhao Yakui <yakui.z...@intel.com>

Thanks
  Yakui


---
  drivers/gpu/drm/i915/intel_mocs.c | 88 +++
  1 file changed, 61 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 3c1482b..d36e609 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -66,9 +66,10 @@ struct drm_i915_mocs_table {
  #define L3_WB 3

  /* Target cache */
-#define ELLC   0
-#define LLC1
-#define LLC_ELLC   2
+#define LE_TC_PAGETABLE0
+#define LE_TC_LLC  1
+#define LE_TC_LLC_ELLC 2
+#define LE_TC_LLC_ELLC_ALT 3

  /*
   * MOCS tables
@@ -96,34 +97,67 @@ struct drm_i915_mocs_table {
   *   end.
   */
  static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-   /* { 0x0009, 0x0010 } */
-   { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) |
-  LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)),
- (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) },
-   /* { 0x0038, 0x0030 } */
-   { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) |
-  LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)),
- (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) },
-   /* { 0x003b, 0x0030 } */
-   { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) |
-  LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)),
- (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }
+   { /* 0x0009 */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0),
+
+ /* 0x0010 */
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   {
+ /* 0x0038 */
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0),
+ /* 0x0030 */
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   {
+ /* 0x003b */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0),
+ /* 0x0030 */
+ .l3cc_value =   L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
  };

  /* NOTE: the LE_TGT_CACHE is not used on Broxton */
  static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-   /* { 0x0009, 0x0010 } */
-   { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) |
-  LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)),
- (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) },
-   /* { 0x0038, 0x0030 } */
-   { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) |
-  LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)),
- (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) },
-   /* { 0x003b, 0x0030 } */
-   { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) |
-  LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)),
- (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }
+   {
+ /* 0x0009 */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0),
+
+ /* 0x0010 */
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   {
+ /* 0x0038 */
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
+  LE_LRUM(3) | LE

Re: [Intel-gfx] [PATCH 1/7] drm/i915: Specify bsd rings through exec flag

2014-12-10 Thread Zhao, Yakui
On Wed, 2014-12-10 at 08:55 -0700, Dave Gordon wrote:
 On 10/12/14 09:11, Daniel Vetter wrote:
  On Wed, Dec 10, 2014 at 02:18:15AM +, Gong, Zhipeng wrote:
  On Tue, 2014-12-09 at 10:46 +0100, Daniel Vetter wrote:
  On Mon, Dec 08, 2014 at 01:55:56PM -0800, Rodrigo Vivi wrote:
 
 [snip]
 
  diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
  b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  index e1ed85a..d9081ec 100644
  --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  @@ -1273,8 +1273,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, 
  void *data,
else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
if (HAS_BSD2(dev)) {
int ring_id;
  - ring_id = gen8_dispatch_bsd_ring(dev, file);
  - ring = dev_priv-ring[ring_id];
  +
  + switch (args-flags  I915_EXEC_BSD_MASK) {
  + case I915_EXEC_BSD_DEFAULT:
  + ring_id = gen8_dispatch_bsd_ring(dev, 
  file);
  + ring = dev_priv-ring[ring_id];
  + break;
  + case I915_EXEC_BSD_RING1:
  + ring = dev_priv-ring[VCS];
 
  Do we have any use-case for selecting ring1 specifically? I've thought
  it's only ring2 that is special?
  The HEVC GPU commands should be dispatched to BSD RING 1 instead of BSD
  RING2 as the two rings are asymmetrical. 
  For the H264 decoding/encoding either ring is OK.
  
  Well then same arguments applies with ring2 since only ring1 is special?
  It's just to minimize abi and reduce the amount of rope we hand to
  userspace.
 
 Anyone who knows to use any of these flags is taking responsibility for
 doing explicit engine allocation, so why not give them all the options
 -- if for no other reason, more symmetry is good.

Agree with Dave's point. The override flag is initiated by the SKL GT3
platform, which requires that the HEVC GPU command can only be
dispatched to the BSD ring1 explicitly as the two BSD rings are not
symmetric. And the override flag can also provide the user-space
app/driver with more flexibility to explicitly determine which BSD ring
should be used to dispatch video GPU command instead of kernel ping-pong
mode. And it benefits the platform with two BSD rings.

 
 As an examle, there could be a case where userspace knows better than
 the kernel how long each batch will take, and can predict an optimal
 allocation pattern rather than just flip-flopping. So even when a batch
 *can* run on either engine, there might be a reason to pick a specific one.
 
 e.g.  short-1 - ring 1
   short-2 - ring 1
   long-1  - ring 2
   short-3 - ring 1
   long-2  - ring 1
 
 because the program knows that the three short batches together will
 take less time than the one first long one.
 
 .Dave.
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t 1/2] Rendercopy/skl: Remove redundant field to fix GPU hang

2014-11-06 Thread Zhao Yakui
After applying the commit(982f7eb238a0898c456e0574dee7c4507738d75f), the 
OUT_RELOC is
updated on Broadwell and later, which is to handle the 64-bit field of gfx 
address
internally. In such case some commands should be fixed, otherwise GPU hang will
be triggered when running rendercopy.
(It is already fixed on Broadwell)

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 lib/rendercopy_gen9.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index 9ff4b3a..e20a84f 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -397,7 +397,6 @@ static void gen7_emit_vertex_buffer(struct 
intel_batchbuffer *batch,
  GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */
  VERTEX_SIZE  VB0_BUFFER_PITCH_SHIFT);
OUT_RELOC(batch-bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
-   OUT_BATCH(0);
OUT_BATCH(3 * VERTEX_SIZE);
 }
 
@@ -523,12 +522,10 @@ gen9_emit_state_base_address(struct intel_batchbuffer 
*batch) {
 
/* surface */
OUT_RELOC(batch-bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-   OUT_BATCH(0);
 
/* dynamic */
OUT_RELOC(batch-bo, I915_GEM_DOMAIN_RENDER | 
I915_GEM_DOMAIN_INSTRUCTION,
  0, BASE_ADDRESS_MODIFY);
-   OUT_BATCH(0);
 
/* indirect */
OUT_BATCH(0);
@@ -536,7 +533,6 @@ gen9_emit_state_base_address(struct intel_batchbuffer 
*batch) {
 
/* instruction */
OUT_RELOC(batch-bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 
BASE_ADDRESS_MODIFY);
-   OUT_BATCH(0);
 
/* general state buffer size */
OUT_BATCH(0xf000 | 1);
-- 
1.8.5.dirty

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t 2/2] Mediafill/skl: Remove redundant field to fix GPU hang

2014-11-06 Thread Zhao Yakui
After applying the commit(982f7eb238a0898c456e0574dee7c4507738d75f), the 
OUT_RELOC is
updated on Broadwell and later, which is to handle the 64-bit field of gfx 
address
internally. In such case some commands should be fixed, otherwise GPU hang will
be triggered when running gem_media_fill.
(It is already fixed on Broadwell)

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 lib/media_fill_gen9.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 6c6ba89..3fd2181 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -205,12 +205,10 @@ gen9_emit_state_base_address(struct intel_batchbuffer 
*batch)
 
/* surface */
OUT_RELOC(batch-bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-   OUT_BATCH(0);
 
/* dynamic */
OUT_RELOC(batch-bo, I915_GEM_DOMAIN_RENDER | 
I915_GEM_DOMAIN_INSTRUCTION,
0, BASE_ADDRESS_MODIFY);
-   OUT_BATCH(0);
 
/* indirect */
OUT_BATCH(0);
@@ -218,7 +216,6 @@ gen9_emit_state_base_address(struct intel_batchbuffer 
*batch)
 
/* instruction */
OUT_RELOC(batch-bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 
BASE_ADDRESS_MODIFY);
-   OUT_BATCH(0);
 
/* general state buffer size */
OUT_BATCH(0xf000 | 1);
-- 
1.8.5.dirty

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Specify bsd rings through exec flag

2014-08-05 Thread Zhao, Yakui
On Tue, 2014-08-05 at 02:44 -0600, Daniel Vetter wrote:
 On Tue, Aug 05, 2014 at 03:54:04PM +0800, Zhipeng Gong wrote:
  On Broadwell GT3 we have 2 Video Command Streamers (VCS), but userspace
  has no control when using VCS1 or VCS2. This patch introduces a mechanism
  to avoid the default ping-pong mode and use one specific ring through
  execution flag.
  
  Signed-off-by: Zhipeng Gong zhipeng.g...@intel.com
 
 This needs an open-source user and proper justification why we need this.
 On bdw. Iirc the only users is content protection which isn't open-source
 due to the usual concerns, so if that hasn't changed this patch is
 rejected.

Hi, Daniel

The open-source media driver also needs this feature for BDW. Now we
are planning to add the following function that depends on this flag for
BDW with two BSD rings.
After the GPU hang occurs on BSD ring during decoding, it needs to
specify the corresponding BSD ring to read the decoding status registers
related with the BSD ring. 
 
  Can this be regarded as one open-source usage scenario?

 
 Also you'd need igt tests and all that too.

I agree. The igt test is needed.

Maybe one patch is missing that exposes the flag of dual BSD rings.
Only when the flag exists, we can specify which bsd ring to dispatch the
BSD video command.

 -Daniel
 
  ---
   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +--
   include/uapi/drm/i915_drm.h|  8 +++-
   2 files changed, 24 insertions(+), 3 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
  b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  index 60998fc..f9ed8e0 100644
  --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  @@ -1279,8 +1279,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
  *data,
  else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
  if (HAS_BSD2(dev)) {
  int ring_id;
  -   ring_id = gen8_dispatch_bsd_ring(dev, file);
  -   ring = dev_priv-ring[ring_id];
  +
  +   switch (args-flags  I915_EXEC_BSD_MASK) {
  +   case I915_EXEC_BSD_DEFAULT:
  +   ring_id = gen8_dispatch_bsd_ring(dev, file);
  +   ring = dev_priv-ring[ring_id];
  +   break;
  +   case I915_EXEC_BSD_RING1:
  +   ring = dev_priv-ring[VCS];
  +   break;
  +   case I915_EXEC_BSD_RING2:
  +   ring = dev_priv-ring[VCS2];
  +   break;
  +   default:
  +   DRM_DEBUG(execbuf with unknown bsd ring: %d\n,
  + (int)(args-flags  
  I915_EXEC_BSD_MASK));
  +   return -EINVAL;
  +   }
  } else
  ring = dev_priv-ring[VCS];
  } else
  diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
  index ff57f07..421420a 100644
  --- a/include/uapi/drm/i915_drm.h
  +++ b/include/uapi/drm/i915_drm.h
  @@ -736,7 +736,13 @@ struct drm_i915_gem_execbuffer2 {
*/
   #define I915_EXEC_HANDLE_LUT   (112)
   
  -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT1)
  +/** Used for switching BSD rings on the platforms with two BSD rings */
  +#define I915_EXEC_BSD_MASK (313)
  +#define I915_EXEC_BSD_DEFAULT  (013) /* default ping-pong 
  mode */
  +#define I915_EXEC_BSD_RING1(113)
  +#define I915_EXEC_BSD_RING2(213)
  +
  +#define __I915_EXEC_UNKNOWN_FLAGS -(115)
   
   #define I915_EXEC_CONTEXT_ID_MASK  (0x)
   #define i915_execbuffer2_set_context_id(eb2, context) \
  -- 
  2.0.3
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce dual_bsd_ring parameter.

2014-06-30 Thread Zhao, Yakui
On Mon, 2014-06-30 at 10:51 -0600, Rodrigo Vivi wrote:
 On Broadwell GT3 we have 2 Video Command Streamers (VCS),
 but userspace has no control when using VCS1 or VCS2. So we cannot test,
 validate or debug specific changes or workaround that might affect only
 one or another ring. So this patch introduces a mechanism to avoid the
 ping-pong selection and use one specific ring given at boot time.

   If it is mainly used for the test/validation, can we add one override
flag so that the user-space app can explicitly declare which BSD ring is
used to dispatch the corresponding BSD commands? In such case it will
force to dispatch the corresponding commands on the ring passed by
user-application.

   At the same time this patch is not helpful under the following
scenario. For example: One application hopes to use the BSD Ring 0 while
another application hopes to use the BSD ring 1. 

 
 Signed-off-by: Rodrigo Vivi rodrigo.v...@intel.com
 ---
  drivers/gpu/drm/i915/i915_drv.h|  1 +
  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 
 ++
  drivers/gpu/drm/i915/i915_params.c |  6 ++
  3 files changed, 27 insertions(+), 14 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 8cea596..7b6614f 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -2069,6 +2069,7 @@ struct i915_params {
   int panel_ignore_lid;
   unsigned int powersave;
   int semaphores;
 + int dual_bsd_ring;
   unsigned int lvds_downclock;
   int lvds_channel_mode;
   int panel_use_ssc;
 diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
 b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 index d815ef5..09f350e 100644
 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 @@ -1035,26 +1035,32 @@ static int gen8_dispatch_bsd_ring(struct drm_device 
 *dev,
  {
   struct drm_i915_private *dev_priv = dev-dev_private;
   struct drm_i915_file_private *file_priv = file-driver_priv;
 + int ring_id;
 + int dual = i915.dual_bsd_ring;
  
   /* Check whether the file_priv is using one ring */
   if (file_priv-bsd_ring)
   return file_priv-bsd_ring-id;
 - else {
 - /* If no, use the ping-pong mechanism to select one ring */
 - int ring_id;
  
 - mutex_lock(dev-struct_mutex);
 - if (dev_priv-mm.bsd_ring_dispatch_index == 0) {
 - ring_id = VCS;
 - dev_priv-mm.bsd_ring_dispatch_index = 1;
 - } else {
 - ring_id = VCS2;
 - dev_priv-mm.bsd_ring_dispatch_index = 0;
 - }
 - file_priv-bsd_ring = dev_priv-ring[ring_id];
 - mutex_unlock(dev-struct_mutex);
 - return ring_id;
 + /* If no, use the parameter defined or ping-pong mechanism
 +  * to select one ring */
 + mutex_lock(dev-struct_mutex);
 +
 + if (dual == 1 || (dual != 2 
 +   dev_priv-mm.bsd_ring_dispatch_index == 0)) {
 + ring_id = VCS;
 + dev_priv-mm.bsd_ring_dispatch_index = 1;
 + } else {
 + ring_id = VCS2;
 + dev_priv-mm.bsd_ring_dispatch_index = 0;
   }
 +
 + file_priv-bsd_ring = dev_priv-ring[ring_id];
 + mutex_unlock(dev-struct_mutex);
 +
 + WARN(dual, Forcibly trying to use only one bsd ring. Using: %s\n,
 +  file_priv-bsd_ring-name);
 + return ring_id;
  }
  
  static struct drm_i915_gem_object *
 diff --git a/drivers/gpu/drm/i915/i915_params.c 
 b/drivers/gpu/drm/i915/i915_params.c
 index 8145729..d4871c8 100644
 --- a/drivers/gpu/drm/i915/i915_params.c
 +++ b/drivers/gpu/drm/i915/i915_params.c
 @@ -29,6 +29,7 @@ struct i915_params i915 __read_mostly = {
   .panel_ignore_lid = 1,
   .powersave = 1,
   .semaphores = -1,
 + .dual_bsd_ring = 0,
   .lvds_downclock = 0,
   .lvds_channel_mode = 0,
   .panel_use_ssc = -1,
 @@ -70,6 +71,11 @@ MODULE_PARM_DESC(semaphores,
   Use semaphores for inter-ring sync 
   (default: -1 (use per-chip defaults)));
  
 +module_param_named(dual_bsd_ring, i915.dual_bsd_ring, int, 0600);
 +MODULE_PARM_DESC(dual_bsd_ring,
 +  Specify bds rings for VCS when there are multiple VCSs available.
 +  (0=All available bsd rings [default], 1=only VCS1, 2=only VCS2));
 +
  module_param_named(enable_rc6, i915.enable_rc6, int, 0400);
  MODULE_PARM_DESC(enable_rc6,
   Enable power-saving render C-state 6. 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Ringbuffer signal func for the second BSD ring

2014-05-12 Thread Zhao, Yakui
On Mon, 2014-05-12 at 13:04 -0600, Daniel Vetter wrote:
 On Fri, May 09, 2014 at 01:44:59PM +0100, oscar.ma...@intel.com wrote:
  From: Oscar Mateo oscar.ma...@intel.com
  
  This is missing in:
  
  commit 78325f2d270897c9ee0887125b7abb963eb8efea
  Author: Ben Widawsky benjamin.widaw...@intel.com
  Date:   Tue Apr 29 14:52:29 2014 -0700
  
  drm/i915: Virtualize the ringbuffer signal func
  
  Looks to me like a rebase side-effect...
  
  Signed-off-by: Oscar Mateo oscar.ma...@intel.com
 
 Queued for -next, thanks for the patch. Iirc there's been a regression
 report too. Anyone know the bugzilla?

It seems that this patch can fix the issue in:

https://bugs.freedesktop.org/show_bug.cgi?id=78274


Thanks.
   Yakui

 -Daniel
 
  ---
   drivers/gpu/drm/i915/intel_ringbuffer.c | 1 +
   1 file changed, 1 insertion(+)
  
  diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
  b/drivers/gpu/drm/i915/intel_ringbuffer.c
  index 9907d66..203fa2b 100644
  --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
  +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
  @@ -2182,6 +2182,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device 
  *dev)
  ring-dispatch_execbuffer =
  gen8_ring_dispatch_execbuffer;
  ring-semaphore.sync_to = gen6_ring_sync;
  +   ring-semaphore.signal = gen6_signal;
  /*
   * The current semaphore is only applied on the pre-gen8. And there
   * is no bsd2 ring on the pre-gen8. So now the semaphore_register
  -- 
  1.9.0
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] tests: Add gem_exec_params

2014-04-24 Thread Zhao Yakui
On Wed, 2014-04-23 at 12:32 -0600, Daniel Vetter wrote:
 This fills all the gaps we've had in our execbuf testing. Overflow
 testing of the various arrays is already done by gem_reloc_overflow.
 
 Also add kms_flip_tiling to .gitignore.
 
 This will cause a bunch of failures since current kernels don't catch
 all fallout.
 

Very good patch. Except some small concerns, it is OK to me.

 Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch
 ---
  tests/.gitignore|   2 +
  tests/Makefile.sources  |   1 +
  tests/gem_exec_params.c | 212 
 
  3 files changed, 215 insertions(+)
  create mode 100644 tests/gem_exec_params.c
 
 diff --git a/tests/.gitignore b/tests/.gitignore
 index 146bab06b565..4c50bae93aa3 100644
 --- a/tests/.gitignore
 +++ b/tests/.gitignore
 @@ -35,6 +35,7 @@ gem_exec_blt
  gem_exec_faulting_reloc
  gem_exec_lut_handle
  gem_exec_nop
 +gem_exec_params
  gem_exec_parse
  gem_fd_exhaustion
  gem_fenced_exec_thrash
 @@ -113,6 +114,7 @@ kms_addfb
  kms_cursor_crc
  kms_fbc_crc
  kms_flip
 +kms_flip_tiling
  kms_pipe_crc_basic
  kms_plane
  kms_render
 diff --git a/tests/Makefile.sources b/tests/Makefile.sources
 index c957ace2ace0..9b2d7cff1113 100644
 --- a/tests/Makefile.sources
 +++ b/tests/Makefile.sources
 @@ -29,6 +29,7 @@ TESTS_progs_M = \
   gem_exec_bad_domains \
   gem_exec_faulting_reloc \
   gem_exec_nop \
 + gem_exec_params \
   gem_exec_parse \
   gem_fenced_exec_thrash \
   gem_fence_thrash \
 diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
 new file mode 100644
 index ..b1d996c530f5
 --- /dev/null
 +++ b/tests/gem_exec_params.c
 @@ -0,0 +1,212 @@
 +/*
 + * Copyright (c) 2014 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the Software),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 DEALINGS
 + * IN THE SOFTWARE.
 + *
 + * Authors:
 + *Daniel Vetter
 + *
 + */
 +
 +#include unistd.h
 +#include stdlib.h
 +#include stdint.h
 +#include stdio.h
 +#include string.h
 +#include fcntl.h
 +#include inttypes.h
 +#include errno.h
 +#include sys/stat.h
 +#include sys/ioctl.h
 +#include sys/time.h
 +#include drm.h
 +
 +#include ioctl_wrappers.h
 +#include drmtest.h
 +#include intel_io.h
 +#include intel_chipset.h
 +#include igt_aux.h
 +
 +#define LOCAL_I915_EXEC_VEBOX (40)
 +
 +struct drm_i915_gem_execbuffer2 execbuf;
 +struct drm_i915_gem_exec_object2 gem_exec[1];
 +uint32_t batch[2] = {MI_BATCH_BUFFER_END};
 +uint32_t handle, devid;
 +int fd;
 +
 +igt_main
 +{
 + igt_fixture {
 + fd = drm_open_any();
 +
 + devid = intel_get_drm_devid(fd);
 +
 + handle = gem_create(fd, 4096);
 + gem_write(fd, handle, 0, batch, sizeof(batch));
 +
 + gem_exec[0].handle = handle;
 + gem_exec[0].relocation_count = 0;
 + gem_exec[0].relocs_ptr = 0;
 + gem_exec[0].alignment = 0;
 + gem_exec[0].offset = 0;
 + gem_exec[0].flags = 0;
 + gem_exec[0].rsvd1 = 0;
 + gem_exec[0].rsvd2 = 0;
 +
 + execbuf.buffers_ptr = (uintptr_t)gem_exec;
 + execbuf.buffer_count = 1;
 + execbuf.batch_start_offset = 0;
 + execbuf.batch_len = 8;

Can we use the sizeof(batch) instead of 8? 

 + execbuf.cliprects_ptr = 0;
 + execbuf.num_cliprects = 0;
 + execbuf.DR1 = 0;
 + execbuf.DR4 = 0;
 + execbuf.flags = 0;
 + i915_execbuffer2_set_context_id(execbuf, 0);
 + execbuf.rsvd2 = 0;
 + }
 +
 + igt_subtest(control) {
 + igt_assert(drmIoctl(fd,
 + DRM_IOCTL_I915_GEM_EXECBUFFER2,
 + execbuf) == 0);
 + execbuf.flags = I915_EXEC_RENDER;
 + igt_assert(drmIoctl(fd,
 + DRM_IOCTL_I915_GEM_EXECBUFFER2,
 +

Re: [Intel-gfx] [PATCH] tests: Add gem_exec_params

2014-04-24 Thread Zhao Yakui
On Thu, 2014-04-24 at 01:18 -0600, Daniel Vetter wrote:
 On Thu, Apr 24, 2014 at 8:43 AM, Zhao Yakui yakui.z...@intel.com wrote:
  On Wed, 2014-04-23 at 12:32 -0600, Daniel Vetter wrote:
  This fills all the gaps we've had in our execbuf testing. Overflow
  testing of the various arrays is already done by gem_reloc_overflow.
 
  Also add kms_flip_tiling to .gitignore.
 
  This will cause a bunch of failures since current kernels don't catch
  all fallout.
 
 
  Very good patch. Except some small concerns, it is OK to me.
 
 Thanks for your comments, replies below.
 -Daniel
 
 
  Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch
  ---
   tests/.gitignore|   2 +
   tests/Makefile.sources  |   1 +
   tests/gem_exec_params.c | 212 
  
   3 files changed, 215 insertions(+)
   create mode 100644 tests/gem_exec_params.c
 
  diff --git a/tests/.gitignore b/tests/.gitignore
  index 146bab06b565..4c50bae93aa3 100644
  --- a/tests/.gitignore
  +++ b/tests/.gitignore
  @@ -35,6 +35,7 @@ gem_exec_blt
   gem_exec_faulting_reloc
   gem_exec_lut_handle
   gem_exec_nop
  +gem_exec_params
   gem_exec_parse
   gem_fd_exhaustion
   gem_fenced_exec_thrash
  @@ -113,6 +114,7 @@ kms_addfb
   kms_cursor_crc
   kms_fbc_crc
   kms_flip
  +kms_flip_tiling
   kms_pipe_crc_basic
   kms_plane
   kms_render
  diff --git a/tests/Makefile.sources b/tests/Makefile.sources
  index c957ace2ace0..9b2d7cff1113 100644
  --- a/tests/Makefile.sources
  +++ b/tests/Makefile.sources
  @@ -29,6 +29,7 @@ TESTS_progs_M = \
gem_exec_bad_domains \
gem_exec_faulting_reloc \
gem_exec_nop \
  + gem_exec_params \
gem_exec_parse \
gem_fenced_exec_thrash \
gem_fence_thrash \
  diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
  new file mode 100644
  index ..b1d996c530f5
  --- /dev/null
  +++ b/tests/gem_exec_params.c
  @@ -0,0 +1,212 @@
  +/*
  + * Copyright (c) 2014 Intel Corporation
  + *
  + * Permission is hereby granted, free of charge, to any person obtaining a
  + * copy of this software and associated documentation files (the 
  Software),
  + * to deal in the Software without restriction, including without 
  limitation
  + * the rights to use, copy, modify, merge, publish, distribute, 
  sublicense,
  + * and/or sell copies of the Software, and to permit persons to whom the
  + * Software is furnished to do so, subject to the following conditions:
  + *
  + * The above copyright notice and this permission notice (including the 
  next
  + * paragraph) shall be included in all copies or substantial portions of 
  the
  + * Software.
  + *
  + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, 
  EXPRESS OR
  + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  MERCHANTABILITY,
  + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
  SHALL
  + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
  OTHER
  + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
  DEALINGS
  + * IN THE SOFTWARE.
  + *
  + * Authors:
  + *Daniel Vetter
  + *
  + */
  +
  +#include unistd.h
  +#include stdlib.h
  +#include stdint.h
  +#include stdio.h
  +#include string.h
  +#include fcntl.h
  +#include inttypes.h
  +#include errno.h
  +#include sys/stat.h
  +#include sys/ioctl.h
  +#include sys/time.h
  +#include drm.h
  +
  +#include ioctl_wrappers.h
  +#include drmtest.h
  +#include intel_io.h
  +#include intel_chipset.h
  +#include igt_aux.h
  +
  +#define LOCAL_I915_EXEC_VEBOX (40)
  +
  +struct drm_i915_gem_execbuffer2 execbuf;
  +struct drm_i915_gem_exec_object2 gem_exec[1];
  +uint32_t batch[2] = {MI_BATCH_BUFFER_END};
  +uint32_t handle, devid;
  +int fd;
  +
  +igt_main
  +{
  + igt_fixture {
  + fd = drm_open_any();
  +
  + devid = intel_get_drm_devid(fd);
  +
  + handle = gem_create(fd, 4096);
  + gem_write(fd, handle, 0, batch, sizeof(batch));
  +
  + gem_exec[0].handle = handle;
  + gem_exec[0].relocation_count = 0;
  + gem_exec[0].relocs_ptr = 0;
  + gem_exec[0].alignment = 0;
  + gem_exec[0].offset = 0;
  + gem_exec[0].flags = 0;
  + gem_exec[0].rsvd1 = 0;
  + gem_exec[0].rsvd2 = 0;
  +
  + execbuf.buffers_ptr = (uintptr_t)gem_exec;
  + execbuf.buffer_count = 1;
  + execbuf.batch_start_offset = 0;
  + execbuf.batch_len = 8;
 
  Can we use the sizeof(batch) instead of 8?
 
 We use noop batches like this all over the place and it's kinda all
 hard-coded magic numbers. Constructing execbufs manually is one of
 those areas in igt which are rather painful, but thus far I just
 didn't come up with a nice approach to it.
 
 Hence I think leaving all the brittle magic

Re: [Intel-gfx] [PATCH V4 3/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-24 Thread Zhao Yakui
On Thu, 2014-04-24 at 09:21 -0600, Daniel Vetter wrote:
 On Thu, Apr 17, 2014 at 10:37:37AM +0800, Zhao Yakui wrote:
  Based on the hardware spec, the BDW GT3 machine has two independent
  BSD ring that can be used to dispatch the video commands.
  So just initialize it.
  
  V3-V4: Follow Imre's comment to do some minor updates. For example:
  more comments are added to describe the semaphore between ring.
  
  Reviewed-by: Imre Deak imre.d...@intel.com
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
 [snip]
 
  +/**
  + * Initialize the second BSD ring for Broadwell GT3.
  + * It is noted that this only exists on Broadwell GT3.
  + */
  +int intel_init_bsd2_ring_buffer(struct drm_device *dev)
  +{
  +   struct drm_i915_private *dev_priv = dev-dev_private;
  +   struct intel_ring_buffer *ring = dev_priv-ring[VCS2];
  +
  +   if ((INTEL_INFO(dev)-gen != 8) ) {
 
 Checkpatch complained about the space here, I've fixed it up quickly.
 Running checkpatch before submitting patches is highly recommended so that
 we all nicely follow the official linux kernel Coding Style.

Thanks for the suggestion.
I will pay attention to this next time.

Thanks.
Yakui

 
 Cheers, Daniel


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V4 3/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-24 Thread Zhao Yakui
On Tue, 2014-04-22 at 13:52 -0600, Daniel Vetter wrote:
 On Thu, Apr 17, 2014 at 10:37:37AM +0800, Zhao Yakui wrote:
  Based on the hardware spec, the BDW GT3 machine has two independent
  BSD ring that can be used to dispatch the video commands.
  So just initialize it.
  
  V3-V4: Follow Imre's comment to do some minor updates. For example:
  more comments are added to describe the semaphore between ring.
 
 Within a patch series we usually keep revisions for each patch separately,
 so this would only be v2 for this patch.
 
 Once a patch is merge people won't ever look at it in context of your
 entire series, but just as an individual patch. If your in-patch commit
 log directly jumps to v4 from v1 then people are left wondering what
 happened to v2 and v3 ;-)
 
 Anyway just a small nit for the next patch series.

Good advice.

I will pay attention to this next time.

Thanks.
Yakui

 -Daniel
 
  
  Reviewed-by: Imre Deak imre.d...@intel.com
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   drivers/gpu/drm/i915/i915_drv.c |4 +-
   drivers/gpu/drm/i915/i915_drv.h |2 +
   drivers/gpu/drm/i915/i915_gem.c |9 +++-
   drivers/gpu/drm/i915/i915_gpu_error.c   |1 +
   drivers/gpu/drm/i915/i915_reg.h |1 +
   drivers/gpu/drm/i915/intel_ringbuffer.c |   78 
  +++
   drivers/gpu/drm/i915/intel_ringbuffer.h |4 +-
   7 files changed, 95 insertions(+), 4 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_drv.c 
  b/drivers/gpu/drm/i915/i915_drv.c
  index 17fbbe5..2a7842b 100644
  --- a/drivers/gpu/drm/i915/i915_drv.c
  +++ b/drivers/gpu/drm/i915/i915_drv.c
  @@ -282,7 +282,7 @@ static const struct intel_device_info 
  intel_broadwell_m_info = {
   static const struct intel_device_info intel_broadwell_gt3d_info = {
  .gen = 8, .num_pipes = 3,
  .need_gfx_hws = 1, .has_hotplug = 1,
  -   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
  .has_llc = 1,
  .has_ddi = 1,
  .has_fbc = 1,
  @@ -292,7 +292,7 @@ static const struct intel_device_info 
  intel_broadwell_gt3d_info = {
   static const struct intel_device_info intel_broadwell_gt3m_info = {
  .gen = 8, .is_mobile = 1, .num_pipes = 3,
  .need_gfx_hws = 1, .has_hotplug = 1,
  -   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
  .has_llc = 1,
  .has_ddi = 1,
  .has_fbc = 1,
  diff --git a/drivers/gpu/drm/i915/i915_drv.h 
  b/drivers/gpu/drm/i915/i915_drv.h
  index 92c3095..74aef6a 100644
  --- a/drivers/gpu/drm/i915/i915_drv.h
  +++ b/drivers/gpu/drm/i915/i915_drv.h
  @@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table {
   #define BSD_RING   (1VCS)
   #define BLT_RING   (1BCS)
   #define VEBOX_RING (1VECS)
  +#define BSD2_RING  (1VCS2)
   #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
  +#define HAS_BSD2(dev)  (INTEL_INFO(dev)-ring_mask  BSD2_RING)
   #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
   #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
   #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
  diff --git a/drivers/gpu/drm/i915/i915_gem.c 
  b/drivers/gpu/drm/i915/i915_gem.c
  index 85c9cf0..65c441c 100644
  --- a/drivers/gpu/drm/i915/i915_gem.c
  +++ b/drivers/gpu/drm/i915/i915_gem.c
  @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device 
  *dev)
  goto cleanup_blt_ring;
  }
   
  +   if (HAS_BSD2(dev)) {
  +   ret = intel_init_bsd2_ring_buffer(dev);
  +   if (ret)
  +   goto cleanup_vebox_ring;
  +   }
   
  ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
  if (ret)
  -   goto cleanup_vebox_ring;
  +   goto cleanup_bsd2_ring;
   
  return 0;
   
  +cleanup_bsd2_ring:
  +   intel_cleanup_ring_buffer(dev_priv-ring[VCS2]);
   cleanup_vebox_ring:
  intel_cleanup_ring_buffer(dev_priv-ring[VECS]);
   cleanup_blt_ring:
  diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
  b/drivers/gpu/drm/i915/i915_gpu_error.c
  index 4865ade..282164c 100644
  --- a/drivers/gpu/drm/i915/i915_gpu_error.c
  +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
  @@ -42,6 +42,7 @@ static const char *ring_str(int ring)
  case VCS: return bsd;
  case BCS: return blt;
  case VECS: return vebox;
  +   case VCS2: return bsd2;
  default: return ;
  }
   }
  diff --git a/drivers/gpu/drm/i915/i915_reg.h 
  b/drivers/gpu/drm/i915/i915_reg.h
  index 8f84555..0b88508 100644
  --- a/drivers/gpu/drm/i915/i915_reg.h
  +++ b/drivers/gpu/drm/i915/i915_reg.h
  @@ -760,6 +760,7 @@ enum punit_power_well {
   #define RENDER_RING_BASE   0x02000
   #define BSD_RING_BASE  0x04000
   #define GEN6_BSD_RING_BASE 0x12000
  +#define

[Intel-gfx] [PATCH I-g-t V4 0/2] Tests: Add test cases based on multi drm_fd to test sync

2014-04-23 Thread Zhao Yakui
This follows Daniel's advice to add the two test cases based on multi drm_fd to 
test the ring sync and CPU-GPU sync.
The Broadwell GT3 machine has two independent BSD rings that can be used
to process the video commands. This is implemented in kernel driver and 
transparent
to the user-space. But we still need to check the ring sync and CPU-GPU sync 
for
the second BSD ring. Two tests are created based on the multi drm_fds to
test the sync. Multi drm_fd can assure that the second BSD ring has the 
opportunity
to dispatch the GPU command. 

V1-V2: Follow Daniel's comment to add one subtext instead of one individual
test case, which is used to test the CPU-GPU sync under multi BSD rings/

V2-V3: Follow Imre's comment to remove the unnecessary initialization and
use igt_assert_f instead of igt_assert


Zhao Yakui (2):
  tests: Add one ring sync case based on multi drm_fd to test ring
semaphore sync under multi BSD rings
  tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to
test CPU-GPU sync under multi BSD rings

 tests/.gitignore|1 +
 tests/Makefile.sources  |1 +
 tests/gem_dummy_reloc_loop.c|  107 +++-
 tests/gem_multi_bsd_sync_loop.c |  175 +++
 4 files changed, 283 insertions(+), 1 deletion(-)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-23 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the CPU-GPU sync for the second BSD ring.

V1-V2: Follow Daniel's comment to add one subtext instead of one individual
test case, which is used to test the CPU-GPU sync under multi BSD rings.

V2-V3: Follow Imre's comment to remove the unnecessary initialization and
use igt_assert_f instead of igt_assert

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/gem_dummy_reloc_loop.c |  107 +-
 1 file changed, 106 insertions(+), 1 deletion(-)

diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c
index a61b59b..4e4dd49 100644
--- a/tests/gem_dummy_reloc_loop.c
+++ b/tests/gem_dummy_reloc_loop.c
@@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr;
 struct intel_batchbuffer *batch;
 static drm_intel_bo *target_buffer;
 
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
 /*
  * Testcase: Basic check of ring-cpu sync using a dummy reloc
  *
@@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings)
}
 }
 
+static void
+dummy_reloc_loop_random_ring_multi_fd(int num_rings)
+{
+   int i;
+   struct intel_batchbuffer *saved_batch;
+
+   saved_batch = batch;
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  0x10; i++) {
+   int mindex;
+   int ring = random() % num_rings + 1;
+
+   mindex = random() % NUM_FD;
+   batch = mbatch[mindex];
+
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_BATCH(0); /* reserved */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP | (122) | (0xf));
+   ADVANCE_BATCH();
+   }
+   intel_batchbuffer_flush_on_ring(batch, ring);
+
+   drm_intel_bo_map(target_buffer, 0);
+   // map to force waiting on rendering
+   drm_intel_bo_unmap(target_buffer);
+   }
+
+   batch = saved_batch;
+}
+
 int fd;
 int devid;
 int num_rings;
@@ -133,6 +184,7 @@ igt_main
igt_skip_on_simulation();
 
igt_fixture {
+   int i;
fd = drm_open_any();
devid = intel_get_drm_devid(fd);
num_rings = gem_get_num_rings(fd);
@@ -148,6 +200,40 @@ igt_main
 
target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 
4096);
igt_assert(target_buffer);
+
+   /* Create multi drm_fd and map one gem object to multi 
gem_contexts */
+   {
+   unsigned int target_flink;
+   char buffer_name[32];
+   if (dri_bo_flink(target_buffer, target_flink)) {
+   printf(fail to get flink for target buffer\n);
+   igt_assert_f(0, fail to create global 
+gem_handle for target buffer\n);
+   }
+   for (i = 0; i  NUM_FD; i++) {
+   sprintf(buffer_name, Target buffer %d\n, i);
+   mfd[i] = drm_open_any();
+   mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 
4096);
+   igt_assert_f(mbufmgr[i],
+fail to initialize buf manager 
+for drm_fd %d\n,
+mfd[i]);
+   drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i]);
+   mbatch[i] = intel_batchbuffer_alloc(mbufmgr[i], 
devid);
+   igt_assert_f(mbatch[i],
+fail to create batchbuffer 
+for drm_fd %d\n,
+mfd[i]);
+   mbuffer[i] = intel_bo_gem_create_from_name(
+   mbufmgr[i

Re: [Intel-gfx] [PATCH I-g-t V4 0/2] Tests: Add test cases based on multi drm_fd to test sync

2014-04-23 Thread Zhao Yakui
On Wed, 2014-04-23 at 20:02 -0600, Zhao, Yakui wrote:

It seems that the patch 01 is filter out.
So I will try to resend it again.

Thanks.
Yakui

 This follows Daniel's advice to add the two test cases based on multi drm_fd 
 to 
 test the ring sync and CPU-GPU sync.
 The Broadwell GT3 machine has two independent BSD rings that can be used
 to process the video commands. This is implemented in kernel driver and 
 transparent
 to the user-space. But we still need to check the ring sync and CPU-GPU 
 sync for
 the second BSD ring. Two tests are created based on the multi drm_fds to
 test the sync. Multi drm_fd can assure that the second BSD ring has the 
 opportunity
 to dispatch the GPU command. 
 
 V1-V2: Follow Daniel's comment to add one subtext instead of one individual
 test case, which is used to test the CPU-GPU sync under multi BSD rings/
 
 V2-V3: Follow Imre's comment to remove the unnecessary initialization and
 use igt_assert_f instead of igt_assert
 
 
 Zhao Yakui (2):
   tests: Add one ring sync case based on multi drm_fd to test ring
 semaphore sync under multi BSD rings
   tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to
 test CPU-GPU sync under multi BSD rings
 
  tests/.gitignore|1 +
  tests/Makefile.sources  |1 +
  tests/gem_dummy_reloc_loop.c|  107 +++-
  tests/gem_multi_bsd_sync_loop.c |  175 
 +++
  4 files changed, 283 insertions(+), 1 deletion(-)
  create mode 100644 tests/gem_multi_bsd_sync_loop.c
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t V4 0/2] Tests: Add test cases based on multi drm_fd to test sync

2014-04-23 Thread Zhao Yakui
This follows Daniel's advice to add the two test cases based on multi drm_fd to 
test the ring sync and CPU-GPU sync.
The Broadwell GT3 machine has two independent BSD rings that can be used
to process the video commands. This is implemented in kernel driver and 
transparent
to the user-space. But we still need to check the ring sync and CPU-GPU sync 
for
the second BSD ring. Two tests are created based on the multi drm_fds to
test the sync. Multi drm_fd can assure that the second BSD ring has the 
opportunity
to dispatch the GPU command. 

V1-V2: Follow Daniel's comment to add one subtext instead of one individual
test case, which is used to test the CPU-GPU sync under multi BSD rings/

V2-V3: Follow Imre's comment to remove the unnecessary initialization and
use igt_assert_f instead of igt_assert

V3-V4: Add gem_multi_bsd_sync_loop.c into the tests/.gitignore

Zhao Yakui (2):
  tests: Add one ring sync case based on multi drm_fd to test ring
semaphore sync under multi BSD rings
  tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to
test CPU-GPU sync under multi BSD rings

 tests/.gitignore|1 +
 tests/Makefile.sources  |1 +
 tests/gem_dummy_reloc_loop.c|  107 +++-
 tests/gem_multi_bsd_sync_loop.c |  175 +++
 4 files changed, 283 insertions(+), 1 deletion(-)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t V4 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings

2014-04-23 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the ring sync between the two BSD rings. At the same time it also needs to
check the sync among the second BSD ring and the other rings.

V2-V3: Follow Imre's comment to remove the unnecessary initialization and
use igt_assert_f instead of igt_assert.

V3-V4: Add gem_multi_bsd_sync_loop.c into the tests/.gitignore

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/.gitignore|1 +
 tests/Makefile.sources  |1 +
 tests/gem_multi_bsd_sync_loop.c |  175 +++
 3 files changed, 177 insertions(+)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 146bab0..42690dd 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -54,6 +54,7 @@ gem_media_fill
 gem_mmap
 gem_mmap_gtt
 gem_mmap_offset_exhaustion
+gem_multi_bsd_sync_loop
 gem_non_secure_batch
 gem_partial_pwrite_pread
 gem_persistent_relocs
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c957ace..7cd9ca8 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -105,6 +105,7 @@ TESTS_progs = \
gem_render_tiled_blits \
gem_ring_sync_copy \
gem_ring_sync_loop \
+   gem_multi_bsd_sync_loop \
gem_seqno_wrap \
gem_set_tiling_vs_gtt \
gem_set_tiling_vs_pwrite \
diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c
new file mode 100644
index 000..b01764a
--- /dev/null
+++ b/tests/gem_multi_bsd_sync_loop.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c)
+ *Zhao Yakui yakui.z...@intel.com
+ *
+ */
+
+#include stdlib.h
+#include stdio.h
+#include string.h
+#include fcntl.h
+#include inttypes.h
+#include errno.h
+#include sys/stat.h
+#include sys/time.h
+#include drm.h
+#include ioctl_wrappers.h
+#include drmtest.h
+#include intel_bufmgr.h
+#include intel_batchbuffer.h
+#include intel_io.h
+#include i830_reg.h
+#include intel_chipset.h
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
+
+/*
+ * Testcase: Basic check of ring-ring sync using a dummy reloc
+ *
+ * Extremely efficient at catching missed irqs with semaphores=0 ...
+ */
+
+#define MI_COND_BATCH_BUFFER_END   (0x3623 | 1)
+#define MI_DO_COMPARE  (121)
+
+static void
+store_dword_loop(int fd)
+{
+   int i;
+   int num_rings = gem_get_num_rings(fd);
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  SLOW_QUICK(0x10, 10); i++) {
+   int ring, mindex;
+   ring = random() % num_rings + 1;
+   mindex = random() % NUM_FD;
+   batch = mbatch[mindex];
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_BATCH(0); /* reserved */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER

Re: [Intel-gfx] [PATCH I-g-t V2 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-22 Thread Zhao Yakui
On Tue, 2014-04-22 at 13:48 -0600, Daniel Vetter wrote:
 On Tue, Apr 22, 2014 at 03:05:03PM +0300, Imre Deak wrote:
  On Tue, 2014-04-15 at 10:38 +0800, Zhao Yakui wrote:
   The Broadwell GT3 machine has two independent BSD rings in kernel driver 
   while
   it is transparent to the user-space driver. In such case it needs to check
   the CPU-GPU sync for the second BSD ring.
   
   V1-V2: Follow Daniel's comment to add one subtext instead of one 
   individual
   test case, which is used to test the CPU-GPU sync under multi BSD rings
   
   Signed-off-by: Zhao Yakui yakui.z...@intel.com
   ---
tests/gem_dummy_reloc_loop.c |  102 
   +-
1 file changed, 101 insertions(+), 1 deletion(-)
   
   diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c
   index a61b59b..660d8e1 100644
   --- a/tests/gem_dummy_reloc_loop.c
   +++ b/tests/gem_dummy_reloc_loop.c
   @@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr;
struct intel_batchbuffer *batch;
static drm_intel_bo *target_buffer;

   +#define NUM_FD   50
   +
   +static int mfd[NUM_FD];
   +static drm_intel_bufmgr *mbufmgr[NUM_FD];
   +static struct intel_batchbuffer *mbatch[NUM_FD];
   +static drm_intel_bo *mbuffer[NUM_FD];
   +
/*
 * Testcase: Basic check of ring-cpu sync using a dummy reloc
 *
   @@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings)
 }
}

   +static void
   +dummy_reloc_loop_random_ring_multi_fd(int num_rings)
   +{
   + int i;
   + struct intel_batchbuffer *saved_batch;
   +
   + saved_batch = batch;
   +
   + srandom(0xdeadbeef);
   +
   + for (i = 0; i  0x10; i++) {
   + int mindex;
   + int ring = random() % num_rings + 1;
   +
   + mindex = random() % NUM_FD;
   + batch = mbatch[mindex];
   +
   + if (ring == I915_EXEC_RENDER) {
   + BEGIN_BATCH(4);
   + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
   + OUT_BATCH(0x); /* compare dword */
   + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
   + I915_GEM_DOMAIN_RENDER, 0);
   + OUT_BATCH(MI_NOOP);
   + ADVANCE_BATCH();
   + } else {
   + BEGIN_BATCH(4);
   + OUT_BATCH(MI_FLUSH_DW | 1);
   + OUT_BATCH(0); /* reserved */
   + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
   + I915_GEM_DOMAIN_RENDER, 0);
   + OUT_BATCH(MI_NOOP | (122) | (0xf));
   + ADVANCE_BATCH();
   + }
   + intel_batchbuffer_flush_on_ring(batch, ring);
   +
   + drm_intel_bo_map(target_buffer, 0);
   + // map to force waiting on rendering
   + drm_intel_bo_unmap(target_buffer);
   + }
   +
   + batch = saved_batch;
   +}
   +
int fd;
int devid;
int num_rings;
   @@ -133,6 +184,7 @@ igt_main
 igt_skip_on_simulation();

 igt_fixture {
   + int i;
 fd = drm_open_any();
 devid = intel_get_drm_devid(fd);
 num_rings = gem_get_num_rings(fd);
   @@ -148,6 +200,35 @@ igt_main

 target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 
   4096);
 igt_assert(target_buffer);
   +
   + /* Create multi drm_fd and map one gem object to multi 
   gem_contexts */
   + {
   + unsigned int target_flink;
   + char buffer_name[32];
   + if (dri_bo_flink(target_buffer, target_flink)) {
   + printf(fail to get flink for target buffer\n);
   + igt_assert(0);
  
  For the future: could be just igt_assert_f().
 
 Yeah I think for new testcases we should try to use the latest igt_*
 macros and helpers as much as possible. Reducing control flow and
 replacing it by the right igt_assert/require/... macro imo really helps
 the readability of testcases.

Hi, Daniel/Imre

Thanks for your comments and advice.
I will update it.

Thanks.
Yakui

 -Daniel
  
   + }
   + for (i = 0; i  NUM_FD; i++) {
   + mfd[i] = 0;
   + mbufmgr[i] = NULL;
   + mbuffer[i] = NULL;
   + }
  
  Nitpick: the above are all statics, so no need to init them.
  
  Other than the above this looks good:
  Reviewed-by: Imre Deak imre.d...@intel.com
  
   + for (i = 0; i  NUM_FD; i++) {
   + sprintf(buffer_name, Target buffer %d\n, i);
   + mfd[i] = drm_open_any();
   + mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 
   4096);
   + igt_assert(mbufmgr[i]);
   + drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i

Re: [Intel-gfx] [PATCH I-g-t V2 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings

2014-04-22 Thread Zhao Yakui
On Tue, 2014-04-22 at 13:44 -0600, Daniel Vetter wrote:
 On Tue, Apr 22, 2014 at 02:52:04PM +0300, Imre Deak wrote:
  On Tue, 2014-04-15 at 10:38 +0800, Zhao Yakui wrote:
   The Broadwell GT3 machine has two independent BSD rings in kernel driver 
   while
   it is transparent to the user-space driver. In such case it needs to check
   the ring sync between the two BSD rings. At the same time it also needs to
   check the sync among the second BSD ring and the other rings.
   
   Signed-off-by: Zhao Yakui yakui.z...@intel.com
   ---
tests/Makefile.sources  |1 +
tests/gem_multi_bsd_sync_loop.c |  172 
   +++
2 files changed, 173 insertions(+)
create mode 100644 tests/gem_multi_bsd_sync_loop.c
   
   diff --git a/tests/Makefile.sources b/tests/Makefile.sources
   index c957ace..7cd9ca8 100644
   --- a/tests/Makefile.sources
   +++ b/tests/Makefile.sources
   @@ -105,6 +105,7 @@ TESTS_progs = \
 gem_render_tiled_blits \
 gem_ring_sync_copy \
 gem_ring_sync_loop \
   + gem_multi_bsd_sync_loop \
 gem_seqno_wrap \
 gem_set_tiling_vs_gtt \
 gem_set_tiling_vs_pwrite \
   diff --git a/tests/gem_multi_bsd_sync_loop.c 
   b/tests/gem_multi_bsd_sync_loop.c
   new file mode 100644
   index 000..7f5b832
   --- /dev/null
   +++ b/tests/gem_multi_bsd_sync_loop.c
   @@ -0,0 +1,172 @@
   +/*
   + * Copyright © 2014 Intel Corporation
   + *
   + * Permission is hereby granted, free of charge, to any person obtaining 
   a
   + * copy of this software and associated documentation files (the 
   Software),
   + * to deal in the Software without restriction, including without 
   limitation
   + * the rights to use, copy, modify, merge, publish, distribute, 
   sublicense,
   + * and/or sell copies of the Software, and to permit persons to whom the
   + * Software is furnished to do so, subject to the following conditions:
   + *
   + * The above copyright notice and this permission notice (including the 
   next
   + * paragraph) shall be included in all copies or substantial portions of 
   the
   + * Software.
   + *
   + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, 
   EXPRESS OR
   + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
   MERCHANTABILITY,
   + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
   SHALL
   + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
   OTHER
   + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
   ARISING
   + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
   DEALINGS
   + * IN THE SOFTWARE.
   + *
   + * Authors:
   + *Daniel Vetter daniel.vet...@ffwll.ch (based on 
   gem_ring_sync_loop_*.c)
   + *Zhao Yakui yakui.z...@intel.com
   + *
   + */
   +
   +#include stdlib.h
   +#include stdio.h
   +#include string.h
   +#include fcntl.h
   +#include inttypes.h
   +#include errno.h
   +#include sys/stat.h
   +#include sys/time.h
   +#include drm.h
   +#include ioctl_wrappers.h
   +#include drmtest.h
   +#include intel_bufmgr.h
   +#include intel_batchbuffer.h
   +#include intel_io.h
   +#include i830_reg.h
   +#include intel_chipset.h
   +
   +static drm_intel_bufmgr *bufmgr;
   +struct intel_batchbuffer *batch;
   +static drm_intel_bo *target_buffer;
   +
   +#define NUM_FD   50
   +
   +static int mfd[NUM_FD];
   +static drm_intel_bufmgr *mbufmgr[NUM_FD];
   +static struct intel_batchbuffer *mbatch[NUM_FD];
   +static drm_intel_bo *mbuffer[NUM_FD];
   +
   +
   +/*
   + * Testcase: Basic check of ring-ring sync using a dummy reloc
   + *
   + * Extremely efficient at catching missed irqs with semaphores=0 ...
   + */
   +
   +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1)
   +#define MI_DO_COMPARE(121)
   +
   +static void
   +store_dword_loop(int fd)
   +{
   + int i;
   + int num_rings = gem_get_num_rings(fd);
   +
   + srandom(0xdeadbeef);
   +
   + for (i = 0; i  SLOW_QUICK(0x10, 10); i++) {
   + int ring, mindex;
   + ring = random() % num_rings + 1;
   + mindex = random() % NUM_FD;
   + batch = mbatch[mindex];
   + if (ring == I915_EXEC_RENDER) {
   + BEGIN_BATCH(4);
   + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
   + OUT_BATCH(0x); /* compare dword */
   + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
   + I915_GEM_DOMAIN_RENDER, 0);
   + OUT_BATCH(MI_NOOP);
   + ADVANCE_BATCH();
   + } else {
   + BEGIN_BATCH(4);
   + OUT_BATCH(MI_FLUSH_DW | 1);
   + OUT_BATCH(0); /* reserved */
   + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
   + I915_GEM_DOMAIN_RENDER, 0);
   + OUT_BATCH(MI_NOOP | (122) | (0xf));
   + ADVANCE_BATCH

[Intel-gfx] [PATCH I-g-t V3 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings

2014-04-22 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the ring sync between the two BSD rings. At the same time it also needs to
check the sync among the second BSD ring and the other rings.

V2-V3: Follow Imre's comment to remove the unnecessary initialization and
use igt_assert_f instead of igt_assert.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/Makefile.sources  |1 +
 tests/gem_multi_bsd_sync_loop.c |  175 +++
 2 files changed, 176 insertions(+)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c957ace..7cd9ca8 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -105,6 +105,7 @@ TESTS_progs = \
gem_render_tiled_blits \
gem_ring_sync_copy \
gem_ring_sync_loop \
+   gem_multi_bsd_sync_loop \
gem_seqno_wrap \
gem_set_tiling_vs_gtt \
gem_set_tiling_vs_pwrite \
diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c
new file mode 100644
index 000..b01764a
--- /dev/null
+++ b/tests/gem_multi_bsd_sync_loop.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c)
+ *Zhao Yakui yakui.z...@intel.com
+ *
+ */
+
+#include stdlib.h
+#include stdio.h
+#include string.h
+#include fcntl.h
+#include inttypes.h
+#include errno.h
+#include sys/stat.h
+#include sys/time.h
+#include drm.h
+#include ioctl_wrappers.h
+#include drmtest.h
+#include intel_bufmgr.h
+#include intel_batchbuffer.h
+#include intel_io.h
+#include i830_reg.h
+#include intel_chipset.h
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
+
+/*
+ * Testcase: Basic check of ring-ring sync using a dummy reloc
+ *
+ * Extremely efficient at catching missed irqs with semaphores=0 ...
+ */
+
+#define MI_COND_BATCH_BUFFER_END   (0x3623 | 1)
+#define MI_DO_COMPARE  (121)
+
+static void
+store_dword_loop(int fd)
+{
+   int i;
+   int num_rings = gem_get_num_rings(fd);
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  SLOW_QUICK(0x10, 10); i++) {
+   int ring, mindex;
+   ring = random() % num_rings + 1;
+   mindex = random() % NUM_FD;
+   batch = mbatch[mindex];
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_BATCH(0); /* reserved */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP | (122) | (0xf));
+   ADVANCE_BATCH();
+   }
+   intel_batchbuffer_flush_on_ring(batch, ring);
+   }
+
+   drm_intel_bo_map(target_buffer, 0);
+   // map to force waiting on rendering
+   drm_intel_bo_unmap(target_buffer);
+}
+
+igt_simple_main
+{
+   int fd;
+   int devid;
+   int i

[Intel-gfx] [PATCH I-g-t V3 0/2] Tests: Add test cases based on multi drm_fd to test sync

2014-04-22 Thread Zhao Yakui
This follows Daniel's advice to add the two test cases based on multi drm_fd to 
test the ring sync and CPU-GPU sync.
The Broadwell GT3 machine has two independent BSD rings that can be used
to process the video commands. This is implemented in kernel driver and 
transparent
to the user-space. But we still need to check the ring sync and CPU-GPU sync 
for
the second BSD ring. Two tests are created based on the multi drm_fds to
test the sync. Multi drm_fd can assure that the second BSD ring has the 
opportunity
to dispatch the GPU command. 

V1-V2: Follow Daniel's comment to add one subtext instead of one individual
test case, which is used to test the CPU-GPU sync under multi BSD rings/

V2-V3: Follow Imre's comment to remove the unnecessary initialization and
use igt_assert_f instead of igt_assert

Zhao Yakui (2):
  tests: Add one ring sync case based on multi drm_fd to test ring
semaphore sync under multi BSD rings
  tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to
test CPU-GPU sync under multi BSD rings

 tests/Makefile.sources  |1 +
 tests/gem_dummy_reloc_loop.c|  107 +++-
 tests/gem_multi_bsd_sync_loop.c |  175 +++
 3 files changed, 282 insertions(+), 1 deletion(-)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t V3 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-22 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the CPU-GPU sync for the second BSD ring.

V1-V2: Follow Daniel's comment to add one subtext instead of one individual
test case, which is used to test the CPU-GPU sync under multi BSD rings.

V2-V3: Follow Imre's comment to remove the unnecessary initialization and
use igt_assert_f instead of igt_assert

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/gem_dummy_reloc_loop.c |  107 +-
 1 file changed, 106 insertions(+), 1 deletion(-)

diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c
index a61b59b..4e4dd49 100644
--- a/tests/gem_dummy_reloc_loop.c
+++ b/tests/gem_dummy_reloc_loop.c
@@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr;
 struct intel_batchbuffer *batch;
 static drm_intel_bo *target_buffer;
 
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
 /*
  * Testcase: Basic check of ring-cpu sync using a dummy reloc
  *
@@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings)
}
 }
 
+static void
+dummy_reloc_loop_random_ring_multi_fd(int num_rings)
+{
+   int i;
+   struct intel_batchbuffer *saved_batch;
+
+   saved_batch = batch;
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  0x10; i++) {
+   int mindex;
+   int ring = random() % num_rings + 1;
+
+   mindex = random() % NUM_FD;
+   batch = mbatch[mindex];
+
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_BATCH(0); /* reserved */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP | (122) | (0xf));
+   ADVANCE_BATCH();
+   }
+   intel_batchbuffer_flush_on_ring(batch, ring);
+
+   drm_intel_bo_map(target_buffer, 0);
+   // map to force waiting on rendering
+   drm_intel_bo_unmap(target_buffer);
+   }
+
+   batch = saved_batch;
+}
+
 int fd;
 int devid;
 int num_rings;
@@ -133,6 +184,7 @@ igt_main
igt_skip_on_simulation();
 
igt_fixture {
+   int i;
fd = drm_open_any();
devid = intel_get_drm_devid(fd);
num_rings = gem_get_num_rings(fd);
@@ -148,6 +200,40 @@ igt_main
 
target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 
4096);
igt_assert(target_buffer);
+
+   /* Create multi drm_fd and map one gem object to multi 
gem_contexts */
+   {
+   unsigned int target_flink;
+   char buffer_name[32];
+   if (dri_bo_flink(target_buffer, target_flink)) {
+   printf(fail to get flink for target buffer\n);
+   igt_assert_f(0, fail to create global 
+gem_handle for target buffer\n);
+   }
+   for (i = 0; i  NUM_FD; i++) {
+   sprintf(buffer_name, Target buffer %d\n, i);
+   mfd[i] = drm_open_any();
+   mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 
4096);
+   igt_assert_f(mbufmgr[i],
+fail to initialize buf manager 
+for drm_fd %d\n,
+mfd[i]);
+   drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i]);
+   mbatch[i] = intel_batchbuffer_alloc(mbufmgr[i], 
devid);
+   igt_assert_f(mbatch[i],
+fail to create batchbuffer 
+for drm_fd %d\n,
+mfd[i]);
+   mbuffer[i] = intel_bo_gem_create_from_name(
+   mbufmgr[i

Re: [Intel-gfx] [PATCH V3 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-16 Thread Zhao Yakui
On Wed, 2014-04-16 at 10:23 -0600, Deak, Imre wrote:
 On Wed, 2014-04-16 at 10:41 +0800, Zhao Yakui wrote:
  Based on the hardware spec, the BDW GT3 machine has two independent
  BSD ring that can be used to dispatch the video commands.
  So just initialize it.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
 
 A couple of nitpicks below, with or without those:
 Reviewed-by: Imre Deak imre.d...@intel.com

Hi, Imre

Thanks for your review and the comments.

I will update the patch based on your comment. 
   
 
  ---
   drivers/gpu/drm/i915/i915_drv.c |4 +--
   drivers/gpu/drm/i915/i915_drv.h |2 ++
   drivers/gpu/drm/i915/i915_gem.c |9 +-
   drivers/gpu/drm/i915/i915_gpu_error.c   |1 +
   drivers/gpu/drm/i915/i915_reg.h |1 +
   drivers/gpu/drm/i915/intel_ringbuffer.c |   54 
  +++
   drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++-
   7 files changed, 71 insertions(+), 4 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_drv.c 
  b/drivers/gpu/drm/i915/i915_drv.c
  index 17fbbe5..2a7842b 100644
  --- a/drivers/gpu/drm/i915/i915_drv.c
  +++ b/drivers/gpu/drm/i915/i915_drv.c
  @@ -282,7 +282,7 @@ static const struct intel_device_info 
  intel_broadwell_m_info = {
   static const struct intel_device_info intel_broadwell_gt3d_info = {
  .gen = 8, .num_pipes = 3,
  .need_gfx_hws = 1, .has_hotplug = 1,
  -   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
  .has_llc = 1,
  .has_ddi = 1,
  .has_fbc = 1,
  @@ -292,7 +292,7 @@ static const struct intel_device_info 
  intel_broadwell_gt3d_info = {
   static const struct intel_device_info intel_broadwell_gt3m_info = {
  .gen = 8, .is_mobile = 1, .num_pipes = 3,
  .need_gfx_hws = 1, .has_hotplug = 1,
  -   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
  .has_llc = 1,
  .has_ddi = 1,
  .has_fbc = 1,
  diff --git a/drivers/gpu/drm/i915/i915_drv.h 
  b/drivers/gpu/drm/i915/i915_drv.h
  index 92c3095..74aef6a 100644
  --- a/drivers/gpu/drm/i915/i915_drv.h
  +++ b/drivers/gpu/drm/i915/i915_drv.h
  @@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table {
   #define BSD_RING   (1VCS)
   #define BLT_RING   (1BCS)
   #define VEBOX_RING (1VECS)
  +#define BSD2_RING  (1VCS2)
   #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
  +#define HAS_BSD2(dev)  (INTEL_INFO(dev)-ring_mask  BSD2_RING)
   #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
   #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
   #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
  diff --git a/drivers/gpu/drm/i915/i915_gem.c 
  b/drivers/gpu/drm/i915/i915_gem.c
  index 85c9cf0..b4dcf2a 100644
  --- a/drivers/gpu/drm/i915/i915_gem.c
  +++ b/drivers/gpu/drm/i915/i915_gem.c
  @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device 
  *dev)
  goto cleanup_blt_ring;
  }
   
  +   if (HAS_BSD2(dev)) {
  +   ret = intel_init_bsd2_ring_buffer(dev);
  +   if (ret)
  +   goto cleanup_vebox_ring;
  +   }
   
  ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
  if (ret)
  -   goto cleanup_vebox_ring;
  +   goto cleanup_ring;
 
 maybe cleanup_bsd2_ring?  
 
   
  return 0;
   
  +cleanup_ring:
  +   intel_cleanup_ring_buffer(dev_priv-ring[VCS2]);
   cleanup_vebox_ring:
  intel_cleanup_ring_buffer(dev_priv-ring[VECS]);
   cleanup_blt_ring:
  diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
  b/drivers/gpu/drm/i915/i915_gpu_error.c
  index 4865ade..3cab7f9 100644
  --- a/drivers/gpu/drm/i915/i915_gpu_error.c
  +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
  @@ -42,6 +42,7 @@ static const char *ring_str(int ring)
  case VCS: return bsd;
  case BCS: return blt;
  case VECS: return vebox;
  +   case VCS2: return second bsd;
 
 bsd2 would be more concise
 

OK. I will update it.

  default: return ;
  }
   }
  diff --git a/drivers/gpu/drm/i915/i915_reg.h 
  b/drivers/gpu/drm/i915/i915_reg.h
  index 8f84555..0b88508 100644
  --- a/drivers/gpu/drm/i915/i915_reg.h
  +++ b/drivers/gpu/drm/i915/i915_reg.h
  @@ -760,6 +760,7 @@ enum punit_power_well {
   #define RENDER_RING_BASE   0x02000
   #define BSD_RING_BASE  0x04000
   #define GEN6_BSD_RING_BASE 0x12000
  +#define GEN8_BSD2_RING_BASE0x1c000
   #define VEBOX_RING_BASE0x1a000
   #define BLT_RING_BASE  0x22000
   #define RING_TAIL(base)((base)+0x30)
  diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
  b/drivers/gpu/drm/i915/intel_ringbuffer.c
  index eb3dd26..8b9b89080 100644
  --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
  +++ b/drivers/gpu/drm/i915

[Intel-gfx] [PATCH V4 2/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space

2014-04-16 Thread Zhao Yakui
Signed-off-by: Zhao Yakui yakui.z...@intel.com
Reviewed-by: Imre Deak imre.d...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h|1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3491402..341ec68 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args-flags  I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
 
-   if ((args-flags  I915_EXEC_RING_MASK)  I915_NUM_RINGS) {
+   if ((args-flags  I915_EXEC_RING_MASK)  LAST_USER_RING) {
DRM_DEBUG(execbuf with unknown ring: %d\n,
  (int)(args-flags  I915_EXEC_RING_MASK));
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 413cdc7..ec9d978 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -63,6 +63,7 @@ struct  intel_ring_buffer {
VECS,
} id;
 #define I915_NUM_RINGS 4
+#define LAST_USER_RING (VECS + 1)
u32 mmio_base;
void__iomem *virtual_start;
struct  drm_device *dev;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V4 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-16 Thread Zhao Yakui
This is the patch set that tries to add the support of dual BSD rings on BDW
GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which
can be used to process the video commands. To be simpler, it is transparent 
to user-space driver/middleware. In such case the kernel driver will decide
which ring is to dispatch the BSD video command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case the different BSD ring is used for video 
playing
back and encoding. 

V1-V2: Follow Daniel's comment to do the following update:
   a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01
   b. update the comment in Patch 04
   c. use the simple ping-pong mechanism to add the support of dual BSD rings.
The further optimization will be considered in another patch set.

V2-V3: Follow Daniel's comment to use the struct_mutext instead of
atomic_t during determining which ring can be used to dispatch Video command.

V3-V4: Follow Imre's comment to adjust the patch order and do some minor 
updates.
For example: add some comments to describe the semaphore in Patch 03 and update
the ring name for the second bsd ring.

Zhao Yakui (6):
  drm/i915: Split the BDW device definition to prepare for dual BSD
rings on BDW GT3
  drm/i915: Update the restrict check to filter out wrong Ring ID
passed by user-space
  drm/i915:Initialize the second BSD ring on BDW GT3 machine
  drm/i915:Handle the irq interrupt for the second BSD ring
  drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7
to remove the switch check warning
  drm/i915: Use the coarse ping-pong mechanism based on drm fd to
dispatch the BSD command on BDW GT3

 drivers/gpu/drm/i915/i915_dma.c|3 +
 drivers/gpu/drm/i915/i915_drv.c|   26 -
 drivers/gpu/drm/i915/i915_drv.h|5 ++
 drivers/gpu/drm/i915/i915_gem.c|9 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   42 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |1 +
 drivers/gpu/drm/i915/i915_irq.c|5 +-
 drivers/gpu/drm/i915/i915_reg.h|1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|   83 
 drivers/gpu/drm/i915/intel_ringbuffer.h|5 +-
 include/drm/i915_pciids.h  |   22 ++--
 11 files changed, 190 insertions(+), 12 deletions(-)

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V4 4/6] drm/i915:Handle the irq interrupt for the second BSD ring

2014-04-16 Thread Zhao Yakui
Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_irq.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7a4d3ae..63bd5de 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
DRM_ERROR(The master control interrupt lied (GT0)!\n);
}
 
-   if (master_ctl  GEN8_GT_VCS1_IRQ) {
+   if (master_ctl  (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
tmp = I915_READ(GEN8_GT_IIR(1));
if (tmp) {
ret = IRQ_HANDLED;
vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
if (vcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[VCS]);
+   vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
+   if (vcs  GT_RENDER_USER_INTERRUPT)
+   notify_ring(dev, dev_priv-ring[VCS2]);
I915_WRITE(GEN8_GT_IIR(1), tmp);
} else
DRM_ERROR(The master control interrupt lied (GT1)!\n);
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V4 6/6] drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-16 Thread Zhao Yakui
The BDW GT3 has two independent BSD rings, which can be used to process the
video commands. To be simpler, it is transparent to user-space driver/middle.
Instead the kernel driver will decide which ring is to dispatch the BSD video
command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case it can play back video stream while encoding
another video stream. The coarse ping-pong mechanism is used to determine
which BSD ring is used to dispatch the BSD video command.

V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
This is only to add the support of dual BSD rings on BDW GT3 machine.
The further optimization will be considered in another patch set.

V2-V3: Follow Daniel's comment to use the struct_mutext instead of
atomic_t during determining which ring can be used to dispatch Video command.

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c|3 +++
 drivers/gpu/drm/i915/i915_drv.h|3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   40 +++-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0b38f88..f7558f5 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
spin_lock_init(dev_priv-backlight_lock);
spin_lock_init(dev_priv-uncore.lock);
spin_lock_init(dev_priv-mm.object_stat_lock);
+   dev_priv-ring_index = 0;
mutex_init(dev_priv-dpio_lock);
mutex_init(dev_priv-modeset_restore_lock);
 
@@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct 
drm_file *file)
 {
struct drm_i915_file_private *file_priv = file-driver_priv;
 
+   if (file_priv  file_priv-bsd_ring)
+   file_priv-bsd_ring = NULL;
kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74aef6a..032f992 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1472,6 +1472,8 @@ struct drm_i915_private {
struct i915_dri1_state dri1;
/* Old ums support infrastructure, same warning applies. */
struct i915_ums_state ums;
+   /* the indicator for dispatch video commands on two BSD rings */
+   int ring_index;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -1679,6 +1681,7 @@ struct drm_i915_file_private {
 
struct i915_hw_context *private_default_ctx;
atomic_t rps_wait_boost;
+   struct  intel_ring_buffer *bsd_ring;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 341ec68..1dc6f03 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -999,6 +999,37 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+/**
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The Ring ID is returned.
+ */
+static int gen8_dispatch_bsd_ring(struct drm_device *dev,
+ struct drm_file *file)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+
+   /* Check whether the file_priv is using one ring */
+   if (file_priv-bsd_ring)
+   return file_priv-bsd_ring-id;
+   else {
+   /* If no, use the ping-pong mechanism to select one ring */
+   int ring_id;
+
+   mutex_lock(dev-struct_mutex);
+   if (dev_priv-ring_index == 0) {
+   ring_id = VCS;
+   dev_priv-ring_index = 1;
+   } else {
+   ring_id = VCS2;
+   dev_priv-ring_index = 0;
+   }
+   file_priv-bsd_ring = dev_priv-ring[ring_id];
+   mutex_unlock(dev-struct_mutex);
+   return ring_id;
+   }
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
   struct drm_file *file,
@@ -1043,7 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
 
if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
ring = dev_priv-ring[RCS];
-   else
+   else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
+   if (HAS_BSD2(dev)) {
+   int ring_id;
+   ring_id = gen8_dispatch_bsd_ring(dev, file);
+   ring = dev_priv-ring[ring_id];
+   } else
+   ring = dev_priv-ring[VCS];
+   } else
ring = dev_priv-ring[(args-flags  I915_EXEC_RING_MASK) - 1

[Intel-gfx] [PATCH V4 5/6] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page

2014-04-16 Thread Zhao Yakui
The Gen7 doesn't have the second BSD ring. But it will complain the switch check
warning message during compilation. So just add it to remove the
switch check warning.

V1-V2: Follow Daniel's comment to update the comment

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7e64ab6..1c08dbb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer 
*ring)
case BCS:
mmio = BLT_HWS_PGA_GEN7;
break;
+   /*
+* VCS2 actually doesn't exist on Gen7. Only shut up
+* gcc switch check warning
+*/
+   case VCS2:
case VCS:
mmio = BSD_HWS_PGA_GEN7;
break;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V4 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3

2014-04-16 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 has the different configuration
with the BDW GT1/GT2. So split the BDW device info definition.
This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine.

V1-V2: Follow Daniel's comment to pay attention to the stolen check for BDW
in kernel/early-quirks.c

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |   26 --
 include/drm/i915_pciids.h   |   22 +-
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5d8250f..17fbbe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -279,6 +279,26 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
GEN_DEFAULT_PIPEOFFSETS,
 };
 
+static const struct intel_device_info intel_broadwell_gt3d_info = {
+   .gen = 8, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3m_info = {
+   .gen = 8, .is_mobile = 1, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -311,8 +331,10 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
INTEL_HSW_M_IDS(intel_haswell_m_info), \
INTEL_VLV_M_IDS(intel_valleyview_m_info),  \
INTEL_VLV_D_IDS(intel_valleyview_d_info),  \
-   INTEL_BDW_M_IDS(intel_broadwell_m_info),   \
-   INTEL_BDW_D_IDS(intel_broadwell_d_info)
+   INTEL_BDW_GT12M_IDS(intel_broadwell_m_info),   \
+   INTEL_BDW_GT12D_IDS(intel_broadwell_d_info),   \
+   INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \
+   INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info)
 
 static const struct pci_device_id pciidlist[] = {  /* aka */
INTEL_PCI_IDS,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 940ece4..24f3cad 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -223,14 +223,26 @@
_INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
_INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
 
-#define INTEL_BDW_M_IDS(info) \
+#define INTEL_BDW_GT12M_IDS(info) \
_INTEL_BDW_M_IDS(1, info), \
-   _INTEL_BDW_M_IDS(2, info), \
-   _INTEL_BDW_M_IDS(3, info)
+   _INTEL_BDW_M_IDS(2, info)
 
-#define INTEL_BDW_D_IDS(info) \
+#define INTEL_BDW_GT12D_IDS(info) \
_INTEL_BDW_D_IDS(1, info), \
-   _INTEL_BDW_D_IDS(2, info), \
+   _INTEL_BDW_D_IDS(2, info)
+
+#define INTEL_BDW_GT3M_IDS(info) \
+   _INTEL_BDW_M_IDS(3, info)
+
+#define INTEL_BDW_GT3D_IDS(info) \
_INTEL_BDW_D_IDS(3, info)
 
+#define INTEL_BDW_M_IDS(info) \
+   INTEL_BDW_GT12M_IDS(info), \
+   INTEL_BDW_GT3M_IDS(info)
+
+#define INTEL_BDW_D_IDS(info) \
+   INTEL_BDW_GT12D_IDS(info), \
+   INTEL_BDW_GT3D_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V4 3/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-16 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 machine has two independent
BSD ring that can be used to dispatch the video commands.
So just initialize it.

V3-V4: Follow Imre's comment to do some minor updates. For example:
more comments are added to describe the semaphore between ring.

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |4 +-
 drivers/gpu/drm/i915/i915_drv.h |2 +
 drivers/gpu/drm/i915/i915_gem.c |9 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c   |1 +
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   78 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |4 +-
 7 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 17fbbe5..2a7842b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -282,7 +282,7 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
 static const struct intel_device_info intel_broadwell_gt3d_info = {
.gen = 8, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
@@ -292,7 +292,7 @@ static const struct intel_device_info 
intel_broadwell_gt3d_info = {
 static const struct intel_device_info intel_broadwell_gt3m_info = {
.gen = 8, .is_mobile = 1, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 92c3095..74aef6a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table {
 #define BSD_RING   (1VCS)
 #define BLT_RING   (1BCS)
 #define VEBOX_RING (1VECS)
+#define BSD2_RING  (1VCS2)
 #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
+#define HAS_BSD2(dev)  (INTEL_INFO(dev)-ring_mask  BSD2_RING)
 #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
 #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
 #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85c9cf0..65c441c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_blt_ring;
}
 
+   if (HAS_BSD2(dev)) {
+   ret = intel_init_bsd2_ring_buffer(dev);
+   if (ret)
+   goto cleanup_vebox_ring;
+   }
 
ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
if (ret)
-   goto cleanup_vebox_ring;
+   goto cleanup_bsd2_ring;
 
return 0;
 
+cleanup_bsd2_ring:
+   intel_cleanup_ring_buffer(dev_priv-ring[VCS2]);
 cleanup_vebox_ring:
intel_cleanup_ring_buffer(dev_priv-ring[VECS]);
 cleanup_blt_ring:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4865ade..282164c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,6 +42,7 @@ static const char *ring_str(int ring)
case VCS: return bsd;
case BCS: return blt;
case VECS: return vebox;
+   case VCS2: return bsd2;
default: return ;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8f84555..0b88508 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -760,6 +760,7 @@ enum punit_power_well {
 #define RENDER_RING_BASE   0x02000
 #define BSD_RING_BASE  0x04000
 #define GEN6_BSD_RING_BASE 0x12000
+#define GEN8_BSD2_RING_BASE0x1c000
 #define VEBOX_RING_BASE0x1a000
 #define BLT_RING_BASE  0x22000
 #define RING_TAIL(base)((base)+0x30)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eb3dd26..7e64ab6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1920,14 +1920,22 @@ int intel_init_render_ring_buffer(struct drm_device 
*dev)
ring-get_seqno = gen6_ring_get_seqno;
ring-set_seqno = ring_set_seqno;
ring-sync_to = gen6_ring_sync;
+   /*
+* The current semaphore is only applied on pre

[Intel-gfx] [PATCH V3 6/6] drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-15 Thread Zhao Yakui
The BDW GT3 has two independent BSD rings, which can be used to process the
video commands. To be simpler, it is transparent to user-space driver/middle.
Instead the kernel driver will decide which ring is to dispatch the BSD video
command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case it can play back video stream while encoding
another video stream. The coarse ping-pong mechanism is used to determine
which BSD ring is used to dispatch the BSD video command.

V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
This is only to add the support of dual BSD rings on BDW GT3 machine.
The further optimization will be considered in another patch set.

V2-V3: Follow Daniel's comment to use the struct_mutext instead of
atomic_t during determining which ring can be used to dispatch Video command.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c|3 +++
 drivers/gpu/drm/i915/i915_drv.h|3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   40 +++-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0b38f88..f7558f5 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
spin_lock_init(dev_priv-backlight_lock);
spin_lock_init(dev_priv-uncore.lock);
spin_lock_init(dev_priv-mm.object_stat_lock);
+   dev_priv-ring_index = 0;
mutex_init(dev_priv-dpio_lock);
mutex_init(dev_priv-modeset_restore_lock);
 
@@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct 
drm_file *file)
 {
struct drm_i915_file_private *file_priv = file-driver_priv;
 
+   if (file_priv  file_priv-bsd_ring)
+   file_priv-bsd_ring = NULL;
kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74aef6a..032f992 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1472,6 +1472,8 @@ struct drm_i915_private {
struct i915_dri1_state dri1;
/* Old ums support infrastructure, same warning applies. */
struct i915_ums_state ums;
+   /* the indicator for dispatch video commands on two BSD rings */
+   int ring_index;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -1679,6 +1681,7 @@ struct drm_i915_file_private {
 
struct i915_hw_context *private_default_ctx;
atomic_t rps_wait_boost;
+   struct  intel_ring_buffer *bsd_ring;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 341ec68..1dc6f03 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -999,6 +999,37 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+/**
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The Ring ID is returned.
+ */
+static int gen8_dispatch_bsd_ring(struct drm_device *dev,
+ struct drm_file *file)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+
+   /* Check whether the file_priv is using one ring */
+   if (file_priv-bsd_ring)
+   return file_priv-bsd_ring-id;
+   else {
+   /* If no, use the ping-pong mechanism to select one ring */
+   int ring_id;
+
+   mutex_lock(dev-struct_mutex);
+   if (dev_priv-ring_index == 0) {
+   ring_id = VCS;
+   dev_priv-ring_index = 1;
+   } else {
+   ring_id = VCS2;
+   dev_priv-ring_index = 0;
+   }
+   file_priv-bsd_ring = dev_priv-ring[ring_id];
+   mutex_unlock(dev-struct_mutex);
+   return ring_id;
+   }
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
   struct drm_file *file,
@@ -1043,7 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
 
if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
ring = dev_priv-ring[RCS];
-   else
+   else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
+   if (HAS_BSD2(dev)) {
+   int ring_id;
+   ring_id = gen8_dispatch_bsd_ring(dev, file);
+   ring = dev_priv-ring[ring_id];
+   } else
+   ring = dev_priv-ring[VCS];
+   } else
ring = dev_priv-ring[(args-flags  I915_EXEC_RING_MASK) - 1];
 
if (!intel_ring_initialized(ring)) {
-- 
1.7.10.1

[Intel-gfx] [PATCH V3 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-15 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 machine has two independent
BSD ring that can be used to dispatch the video commands.
So just initialize it.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |4 +--
 drivers/gpu/drm/i915/i915_drv.h |2 ++
 drivers/gpu/drm/i915/i915_gem.c |9 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |1 +
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   54 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++-
 7 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 17fbbe5..2a7842b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -282,7 +282,7 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
 static const struct intel_device_info intel_broadwell_gt3d_info = {
.gen = 8, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
@@ -292,7 +292,7 @@ static const struct intel_device_info 
intel_broadwell_gt3d_info = {
 static const struct intel_device_info intel_broadwell_gt3m_info = {
.gen = 8, .is_mobile = 1, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 92c3095..74aef6a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table {
 #define BSD_RING   (1VCS)
 #define BLT_RING   (1BCS)
 #define VEBOX_RING (1VECS)
+#define BSD2_RING  (1VCS2)
 #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
+#define HAS_BSD2(dev)  (INTEL_INFO(dev)-ring_mask  BSD2_RING)
 #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
 #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
 #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85c9cf0..b4dcf2a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_blt_ring;
}
 
+   if (HAS_BSD2(dev)) {
+   ret = intel_init_bsd2_ring_buffer(dev);
+   if (ret)
+   goto cleanup_vebox_ring;
+   }
 
ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
if (ret)
-   goto cleanup_vebox_ring;
+   goto cleanup_ring;
 
return 0;
 
+cleanup_ring:
+   intel_cleanup_ring_buffer(dev_priv-ring[VCS2]);
 cleanup_vebox_ring:
intel_cleanup_ring_buffer(dev_priv-ring[VECS]);
 cleanup_blt_ring:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4865ade..3cab7f9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,6 +42,7 @@ static const char *ring_str(int ring)
case VCS: return bsd;
case BCS: return blt;
case VECS: return vebox;
+   case VCS2: return second bsd;
default: return ;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8f84555..0b88508 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -760,6 +760,7 @@ enum punit_power_well {
 #define RENDER_RING_BASE   0x02000
 #define BSD_RING_BASE  0x04000
 #define GEN6_BSD_RING_BASE 0x12000
+#define GEN8_BSD2_RING_BASE0x1c000
 #define VEBOX_RING_BASE0x1a000
 #define BLT_RING_BASE  0x22000
 #define RING_TAIL(base)((base)+0x30)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eb3dd26..8b9b89080 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1924,10 +1924,12 @@ int intel_init_render_ring_buffer(struct drm_device 
*dev)
ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV;
ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB;
ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE;
+   ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
ring-signal_mbox[RCS] = GEN6_NOSYNC;
ring-signal_mbox[VCS] = GEN6_VRSYNC

[Intel-gfx] [PATCH V3 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3

2014-04-15 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 has the different configuration
with the BDW GT1/GT2. So split the BDW device info definition.
This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine.

V1-V2: Follow Daniel's comment to pay attention to the stolen check for BDW
in kernel/early-quirks.c

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |   26 --
 include/drm/i915_pciids.h   |   22 +-
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5d8250f..17fbbe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -279,6 +279,26 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
GEN_DEFAULT_PIPEOFFSETS,
 };
 
+static const struct intel_device_info intel_broadwell_gt3d_info = {
+   .gen = 8, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3m_info = {
+   .gen = 8, .is_mobile = 1, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -311,8 +331,10 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
INTEL_HSW_M_IDS(intel_haswell_m_info), \
INTEL_VLV_M_IDS(intel_valleyview_m_info),  \
INTEL_VLV_D_IDS(intel_valleyview_d_info),  \
-   INTEL_BDW_M_IDS(intel_broadwell_m_info),   \
-   INTEL_BDW_D_IDS(intel_broadwell_d_info)
+   INTEL_BDW_GT12M_IDS(intel_broadwell_m_info),   \
+   INTEL_BDW_GT12D_IDS(intel_broadwell_d_info),   \
+   INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \
+   INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info)
 
 static const struct pci_device_id pciidlist[] = {  /* aka */
INTEL_PCI_IDS,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 940ece4..24f3cad 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -223,14 +223,26 @@
_INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
_INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
 
-#define INTEL_BDW_M_IDS(info) \
+#define INTEL_BDW_GT12M_IDS(info) \
_INTEL_BDW_M_IDS(1, info), \
-   _INTEL_BDW_M_IDS(2, info), \
-   _INTEL_BDW_M_IDS(3, info)
+   _INTEL_BDW_M_IDS(2, info)
 
-#define INTEL_BDW_D_IDS(info) \
+#define INTEL_BDW_GT12D_IDS(info) \
_INTEL_BDW_D_IDS(1, info), \
-   _INTEL_BDW_D_IDS(2, info), \
+   _INTEL_BDW_D_IDS(2, info)
+
+#define INTEL_BDW_GT3M_IDS(info) \
+   _INTEL_BDW_M_IDS(3, info)
+
+#define INTEL_BDW_GT3D_IDS(info) \
_INTEL_BDW_D_IDS(3, info)
 
+#define INTEL_BDW_M_IDS(info) \
+   INTEL_BDW_GT12M_IDS(info), \
+   INTEL_BDW_GT3M_IDS(info)
+
+#define INTEL_BDW_D_IDS(info) \
+   INTEL_BDW_GT12D_IDS(info), \
+   INTEL_BDW_GT3D_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 5/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space

2014-04-15 Thread Zhao Yakui
One extra ring is added in the kernel driver but it is transparent to the
user-space application/middleware. In such case the number of the rings
in kernel driver is bigger than that exported to the user-space. So
it needs to filter out the wrong Ring ID passed by user-space.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h|1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3491402..341ec68 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args-flags  I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
 
-   if ((args-flags  I915_EXEC_RING_MASK)  I915_NUM_RINGS) {
+   if ((args-flags  I915_EXEC_RING_MASK)  LAST_USER_RING) {
DRM_DEBUG(execbuf with unknown ring: %d\n,
  (int)(args-flags  I915_EXEC_RING_MASK));
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8ca4285..59f4cdd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -64,6 +64,7 @@ struct  intel_ring_buffer {
VCS2,
} id;
 #define I915_NUM_RINGS 5
+#define LAST_USER_RING (VECS + 1)
u32 mmio_base;
void__iomem *virtual_start;
struct  drm_device *dev;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-15 Thread Zhao Yakui
This is the patch set that tries to add the support of dual BSD rings on BDW
GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which
can be used to process the video commands. To be simpler, it is transparent 
to user-space driver/middleware. In such case the kernel driver will decide
which ring is to dispatch the BSD video command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case the different BSD ring is used for video 
playing
back and encoding. 

V1-V2: Follow Daniel's comment to do the following update:
   a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01
   b. update the comment in Patch 04
   c. use the simple ping-pong mechanism to add the support of dual BSD rings.
The further optimization will be considered in another patch set.

V2-V3: Follow Daniel's comment to use the struct_mutext instead of
atomic_t during determining which ring can be used to dispatch Video command.


Zhao Yakui (6):
  drm/i915: Split the BDW device definition to prepare for dual BSD
rings on BDW GT3
  drm/i915:Initialize the second BSD ring on BDW GT3 machine
  drm/i915:Handle the irq interrupt for the second BSD ring
  drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7
to remove the switch check warning
  drm/i915: Update the restrict check to filter out wrong Ring ID
passed by user-space
  drm/i915: Use the coarse ping-pong mechanism based on drm fd to
dispatch the BSD command on BDW GT3

 drivers/gpu/drm/i915/i915_dma.c|3 ++
 drivers/gpu/drm/i915/i915_drv.c|   26 +++-
 drivers/gpu/drm/i915/i915_drv.h|5 +++
 drivers/gpu/drm/i915/i915_gem.c|9 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   42 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c  |1 +
 drivers/gpu/drm/i915/i915_irq.c|5 ++-
 drivers/gpu/drm/i915/i915_reg.h|1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|   59 
 drivers/gpu/drm/i915/intel_ringbuffer.h|5 ++-
 include/drm/i915_pciids.h  |   22 ---
 11 files changed, 166 insertions(+), 12 deletions(-)

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 4/6] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning

2014-04-15 Thread Zhao Yakui
The Gen7 doesn't have the second BSD ring. But it will complain the switch check
warning message during compilation. So just add it to remove the
switch check warning.

V1-V2: Follow Daniel's comment to update the comment

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8b9b89080..2c89525 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer 
*ring)
case BCS:
mmio = BLT_HWS_PGA_GEN7;
break;
+   /*
+* VCS2 actually doesn't exist on Gen7. Only shut up
+* gcc switch check warning
+*/
+   case VCS2:
case VCS:
mmio = BSD_HWS_PGA_GEN7;
break;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3

2014-04-15 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 has the different configuration
with the BDW GT1/GT2. So split the BDW device info definition.
This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine.

V1-V2: Follow Daniel's comment to pay attention to the stolen check for BDW
in kernel/early-quirks.c

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |   26 --
 include/drm/i915_pciids.h   |   22 +-
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5d8250f..17fbbe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -279,6 +279,26 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
GEN_DEFAULT_PIPEOFFSETS,
 };
 
+static const struct intel_device_info intel_broadwell_gt3d_info = {
+   .gen = 8, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3m_info = {
+   .gen = 8, .is_mobile = 1, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -311,8 +331,10 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
INTEL_HSW_M_IDS(intel_haswell_m_info), \
INTEL_VLV_M_IDS(intel_valleyview_m_info),  \
INTEL_VLV_D_IDS(intel_valleyview_d_info),  \
-   INTEL_BDW_M_IDS(intel_broadwell_m_info),   \
-   INTEL_BDW_D_IDS(intel_broadwell_d_info)
+   INTEL_BDW_GT12M_IDS(intel_broadwell_m_info),   \
+   INTEL_BDW_GT12D_IDS(intel_broadwell_d_info),   \
+   INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \
+   INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info)
 
 static const struct pci_device_id pciidlist[] = {  /* aka */
INTEL_PCI_IDS,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 940ece4..24f3cad 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -223,14 +223,26 @@
_INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
_INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
 
-#define INTEL_BDW_M_IDS(info) \
+#define INTEL_BDW_GT12M_IDS(info) \
_INTEL_BDW_M_IDS(1, info), \
-   _INTEL_BDW_M_IDS(2, info), \
-   _INTEL_BDW_M_IDS(3, info)
+   _INTEL_BDW_M_IDS(2, info)
 
-#define INTEL_BDW_D_IDS(info) \
+#define INTEL_BDW_GT12D_IDS(info) \
_INTEL_BDW_D_IDS(1, info), \
-   _INTEL_BDW_D_IDS(2, info), \
+   _INTEL_BDW_D_IDS(2, info)
+
+#define INTEL_BDW_GT3M_IDS(info) \
+   _INTEL_BDW_M_IDS(3, info)
+
+#define INTEL_BDW_GT3D_IDS(info) \
_INTEL_BDW_D_IDS(3, info)
 
+#define INTEL_BDW_M_IDS(info) \
+   INTEL_BDW_GT12M_IDS(info), \
+   INTEL_BDW_GT3M_IDS(info)
+
+#define INTEL_BDW_D_IDS(info) \
+   INTEL_BDW_GT12D_IDS(info), \
+   INTEL_BDW_GT3D_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 4/6] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning

2014-04-15 Thread Zhao Yakui
The Gen7 doesn't have the second BSD ring. But it will complain the switch check
warning message during compilation. So just add it to remove the
switch check warning.

V1-V2: Follow Daniel's comment to update the comment

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8b9b89080..2c89525 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer 
*ring)
case BCS:
mmio = BLT_HWS_PGA_GEN7;
break;
+   /*
+* VCS2 actually doesn't exist on Gen7. Only shut up
+* gcc switch check warning
+*/
+   case VCS2:
case VCS:
mmio = BSD_HWS_PGA_GEN7;
break;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 6/6] drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-15 Thread Zhao Yakui
The BDW GT3 has two independent BSD rings, which can be used to process the
video commands. To be simpler, it is transparent to user-space driver/middle.
Instead the kernel driver will decide which ring is to dispatch the BSD video
command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case it can play back video stream while encoding
another video stream. The coarse ping-pong mechanism is used to determine
which BSD ring is used to dispatch the BSD video command.

V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
This is only to add the support of dual BSD rings on BDW GT3 machine.
The further optimization will be considered in another patch set.

V2-V3: Follow Daniel's comment to use the struct_mutext instead of
atomic_t during determining which ring can be used to dispatch Video command.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c|3 +++
 drivers/gpu/drm/i915/i915_drv.h|3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   40 +++-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0b38f88..f7558f5 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
spin_lock_init(dev_priv-backlight_lock);
spin_lock_init(dev_priv-uncore.lock);
spin_lock_init(dev_priv-mm.object_stat_lock);
+   dev_priv-ring_index = 0;
mutex_init(dev_priv-dpio_lock);
mutex_init(dev_priv-modeset_restore_lock);
 
@@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct 
drm_file *file)
 {
struct drm_i915_file_private *file_priv = file-driver_priv;
 
+   if (file_priv  file_priv-bsd_ring)
+   file_priv-bsd_ring = NULL;
kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74aef6a..032f992 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1472,6 +1472,8 @@ struct drm_i915_private {
struct i915_dri1_state dri1;
/* Old ums support infrastructure, same warning applies. */
struct i915_ums_state ums;
+   /* the indicator for dispatch video commands on two BSD rings */
+   int ring_index;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -1679,6 +1681,7 @@ struct drm_i915_file_private {
 
struct i915_hw_context *private_default_ctx;
atomic_t rps_wait_boost;
+   struct  intel_ring_buffer *bsd_ring;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 341ec68..1dc6f03 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -999,6 +999,37 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+/**
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The Ring ID is returned.
+ */
+static int gen8_dispatch_bsd_ring(struct drm_device *dev,
+ struct drm_file *file)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+
+   /* Check whether the file_priv is using one ring */
+   if (file_priv-bsd_ring)
+   return file_priv-bsd_ring-id;
+   else {
+   /* If no, use the ping-pong mechanism to select one ring */
+   int ring_id;
+
+   mutex_lock(dev-struct_mutex);
+   if (dev_priv-ring_index == 0) {
+   ring_id = VCS;
+   dev_priv-ring_index = 1;
+   } else {
+   ring_id = VCS2;
+   dev_priv-ring_index = 0;
+   }
+   file_priv-bsd_ring = dev_priv-ring[ring_id];
+   mutex_unlock(dev-struct_mutex);
+   return ring_id;
+   }
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
   struct drm_file *file,
@@ -1043,7 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
 
if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
ring = dev_priv-ring[RCS];
-   else
+   else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
+   if (HAS_BSD2(dev)) {
+   int ring_id;
+   ring_id = gen8_dispatch_bsd_ring(dev, file);
+   ring = dev_priv-ring[ring_id];
+   } else
+   ring = dev_priv-ring[VCS];
+   } else
ring = dev_priv-ring[(args-flags  I915_EXEC_RING_MASK) - 1];
 
if (!intel_ring_initialized(ring)) {
-- 
1.7.10.1

[Intel-gfx] [PATCH V3 5/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space

2014-04-15 Thread Zhao Yakui
One extra ring is added in the kernel driver but it is transparent to the
user-space application/middleware. In such case the number of the rings
in kernel driver is bigger than that exported to the user-space. So
it needs to filter out the wrong Ring ID passed by user-space.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h|1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3491402..341ec68 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args-flags  I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
 
-   if ((args-flags  I915_EXEC_RING_MASK)  I915_NUM_RINGS) {
+   if ((args-flags  I915_EXEC_RING_MASK)  LAST_USER_RING) {
DRM_DEBUG(execbuf with unknown ring: %d\n,
  (int)(args-flags  I915_EXEC_RING_MASK));
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8ca4285..59f4cdd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -64,6 +64,7 @@ struct  intel_ring_buffer {
VCS2,
} id;
 #define I915_NUM_RINGS 5
+#define LAST_USER_RING (VECS + 1)
u32 mmio_base;
void__iomem *virtual_start;
struct  drm_device *dev;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-15 Thread Zhao Yakui
This is the patch set that tries to add the support of dual BSD rings on BDW
GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which
can be used to process the video commands. To be simpler, it is transparent 
to user-space driver/middleware. In such case the kernel driver will decide
which ring is to dispatch the BSD video command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case the different BSD ring is used for video 
playing
back and encoding. 

V1-V2: Follow Daniel's comment to do the following update:
   a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01
   b. update the comment in Patch 04
   c. use the simple ping-pong mechanism to add the support of dual BSD rings.
The further optimization will be considered in another patch set.

V2-V3: Follow Daniel's comment to use the struct_mutext instead of
atomic_t during determining which ring can be used to dispatch Video command.


Zhao Yakui (6):
  drm/i915: Split the BDW device definition to prepare for dual BSD
rings on BDW GT3
  drm/i915:Initialize the second BSD ring on BDW GT3 machine
  drm/i915:Handle the irq interrupt for the second BSD ring
  drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7
to remove the switch check warning
  drm/i915: Update the restrict check to filter out wrong Ring ID
passed by user-space
  drm/i915: Use the coarse ping-pong mechanism based on drm fd to
dispatch the BSD command on BDW GT3

 drivers/gpu/drm/i915/i915_dma.c|3 ++
 drivers/gpu/drm/i915/i915_drv.c|   26 +++-
 drivers/gpu/drm/i915/i915_drv.h|5 +++
 drivers/gpu/drm/i915/i915_gem.c|9 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   42 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c  |1 +
 drivers/gpu/drm/i915/i915_irq.c|5 ++-
 drivers/gpu/drm/i915/i915_reg.h|1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|   59 
 drivers/gpu/drm/i915/intel_ringbuffer.h|5 ++-
 include/drm/i915_pciids.h  |   22 ---
 11 files changed, 166 insertions(+), 12 deletions(-)

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 3/6] drm/i915:Handle the irq interrupt for the second BSD ring

2014-04-15 Thread Zhao Yakui
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_irq.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7a4d3ae..63bd5de 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
DRM_ERROR(The master control interrupt lied (GT0)!\n);
}
 
-   if (master_ctl  GEN8_GT_VCS1_IRQ) {
+   if (master_ctl  (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
tmp = I915_READ(GEN8_GT_IIR(1));
if (tmp) {
ret = IRQ_HANDLED;
vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
if (vcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[VCS]);
+   vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
+   if (vcs  GT_RENDER_USER_INTERRUPT)
+   notify_ring(dev, dev_priv-ring[VCS2]);
I915_WRITE(GEN8_GT_IIR(1), tmp);
} else
DRM_ERROR(The master control interrupt lied (GT1)!\n);
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V3 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-15 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 machine has two independent
BSD ring that can be used to dispatch the video commands.
So just initialize it.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |4 +--
 drivers/gpu/drm/i915/i915_drv.h |2 ++
 drivers/gpu/drm/i915/i915_gem.c |9 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |1 +
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   54 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++-
 7 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 17fbbe5..2a7842b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -282,7 +282,7 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
 static const struct intel_device_info intel_broadwell_gt3d_info = {
.gen = 8, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
@@ -292,7 +292,7 @@ static const struct intel_device_info 
intel_broadwell_gt3d_info = {
 static const struct intel_device_info intel_broadwell_gt3m_info = {
.gen = 8, .is_mobile = 1, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 92c3095..74aef6a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table {
 #define BSD_RING   (1VCS)
 #define BLT_RING   (1BCS)
 #define VEBOX_RING (1VECS)
+#define BSD2_RING  (1VCS2)
 #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
+#define HAS_BSD2(dev)  (INTEL_INFO(dev)-ring_mask  BSD2_RING)
 #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
 #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
 #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85c9cf0..b4dcf2a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_blt_ring;
}
 
+   if (HAS_BSD2(dev)) {
+   ret = intel_init_bsd2_ring_buffer(dev);
+   if (ret)
+   goto cleanup_vebox_ring;
+   }
 
ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
if (ret)
-   goto cleanup_vebox_ring;
+   goto cleanup_ring;
 
return 0;
 
+cleanup_ring:
+   intel_cleanup_ring_buffer(dev_priv-ring[VCS2]);
 cleanup_vebox_ring:
intel_cleanup_ring_buffer(dev_priv-ring[VECS]);
 cleanup_blt_ring:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4865ade..3cab7f9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,6 +42,7 @@ static const char *ring_str(int ring)
case VCS: return bsd;
case BCS: return blt;
case VECS: return vebox;
+   case VCS2: return second bsd;
default: return ;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8f84555..0b88508 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -760,6 +760,7 @@ enum punit_power_well {
 #define RENDER_RING_BASE   0x02000
 #define BSD_RING_BASE  0x04000
 #define GEN6_BSD_RING_BASE 0x12000
+#define GEN8_BSD2_RING_BASE0x1c000
 #define VEBOX_RING_BASE0x1a000
 #define BLT_RING_BASE  0x22000
 #define RING_TAIL(base)((base)+0x30)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eb3dd26..8b9b89080 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1924,10 +1924,12 @@ int intel_init_render_ring_buffer(struct drm_device 
*dev)
ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV;
ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB;
ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE;
+   ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
ring-signal_mbox[RCS] = GEN6_NOSYNC;
ring-signal_mbox[VCS] = GEN6_VRSYNC

Re: [Intel-gfx] [PATCH V2 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 01:09 -0600, Daniel Vetter wrote:
 On Mon, Apr 14, 2014 at 12:21:39PM +0800, Zhao Yakui wrote:
  V1-V2: Follow Daniel's comment to consider the stolen check for BDW in
  kernel/early-quirks.c
 
 Small style nit: We usually put the patch changelog at the end of the
 commit message. That way the core commit message is clearly separated from
 the per-patch changelog. In rare cases there's some confusion otherwise.
 No need to resend just for that.

Thanks for your advice.

I will pay attention to the style nit next time.

Thanks.
Yakui

 -Daniel
 
  
  Based on the hardware spec, the BDW GT3 has the different configuration
  with the BDW GT1/GT2. So split the BDW device info definition.
  This is to do the preparation for adding the Dual BSD rings on BDW GT3 
  machine.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   drivers/gpu/drm/i915/i915_drv.c |   26 --
   include/drm/i915_pciids.h   |   22 +-
   2 files changed, 41 insertions(+), 7 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_drv.c 
  b/drivers/gpu/drm/i915/i915_drv.c
  index 5d8250f..17fbbe5 100644
  --- a/drivers/gpu/drm/i915/i915_drv.c
  +++ b/drivers/gpu/drm/i915/i915_drv.c
  @@ -279,6 +279,26 @@ static const struct intel_device_info 
  intel_broadwell_m_info = {
  GEN_DEFAULT_PIPEOFFSETS,
   };
   
  +static const struct intel_device_info intel_broadwell_gt3d_info = {
  +   .gen = 8, .num_pipes = 3,
  +   .need_gfx_hws = 1, .has_hotplug = 1,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .has_llc = 1,
  +   .has_ddi = 1,
  +   .has_fbc = 1,
  +   GEN_DEFAULT_PIPEOFFSETS,
  +};
  +
  +static const struct intel_device_info intel_broadwell_gt3m_info = {
  +   .gen = 8, .is_mobile = 1, .num_pipes = 3,
  +   .need_gfx_hws = 1, .has_hotplug = 1,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .has_llc = 1,
  +   .has_ddi = 1,
  +   .has_fbc = 1,
  +   GEN_DEFAULT_PIPEOFFSETS,
  +};
  +
   /*
* Make sure any device matches here are from most specific to most
* general.  For example, since the Quanta match is based on the subsystem
  @@ -311,8 +331,10 @@ static const struct intel_device_info 
  intel_broadwell_m_info = {
  INTEL_HSW_M_IDS(intel_haswell_m_info), \
  INTEL_VLV_M_IDS(intel_valleyview_m_info),  \
  INTEL_VLV_D_IDS(intel_valleyview_d_info),  \
  -   INTEL_BDW_M_IDS(intel_broadwell_m_info),   \
  -   INTEL_BDW_D_IDS(intel_broadwell_d_info)
  +   INTEL_BDW_GT12M_IDS(intel_broadwell_m_info),   \
  +   INTEL_BDW_GT12D_IDS(intel_broadwell_d_info),   \
  +   INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \
  +   INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info)
   
   static const struct pci_device_id pciidlist[] = {  /* aka */
  INTEL_PCI_IDS,
  diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
  index 940ece4..24f3cad 100644
  --- a/include/drm/i915_pciids.h
  +++ b/include/drm/i915_pciids.h
  @@ -223,14 +223,26 @@
  _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
  _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
   
  -#define INTEL_BDW_M_IDS(info) \
  +#define INTEL_BDW_GT12M_IDS(info) \
  _INTEL_BDW_M_IDS(1, info), \
  -   _INTEL_BDW_M_IDS(2, info), \
  -   _INTEL_BDW_M_IDS(3, info)
  +   _INTEL_BDW_M_IDS(2, info)
   
  -#define INTEL_BDW_D_IDS(info) \
  +#define INTEL_BDW_GT12D_IDS(info) \
  _INTEL_BDW_D_IDS(1, info), \
  -   _INTEL_BDW_D_IDS(2, info), \
  +   _INTEL_BDW_D_IDS(2, info)
  +
  +#define INTEL_BDW_GT3M_IDS(info) \
  +   _INTEL_BDW_M_IDS(3, info)
  +
  +#define INTEL_BDW_GT3D_IDS(info) \
  _INTEL_BDW_D_IDS(3, info)
   
  +#define INTEL_BDW_M_IDS(info) \
  +   INTEL_BDW_GT12M_IDS(info), \
  +   INTEL_BDW_GT3M_IDS(info)
  +
  +#define INTEL_BDW_D_IDS(info) \
  +   INTEL_BDW_GT12D_IDS(info), \
  +   INTEL_BDW_GT3D_IDS(info)
  +
   #endif /* _I915_PCIIDS_H */
  -- 
  1.7.10.1
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 01:06 -0600, Daniel Vetter wrote:
 On Mon, Apr 14, 2014 at 12:19:58PM +0800, Zhao Yakui wrote:
  The Broadwell GT3 machine has two independent BSD rings in kernel driver 
  while
  it is transparent to the user-space driver. In such case it needs to check
  the CPU-GPU sync for the second BSD ring. Multi drm_fd can assure that the
  second BSD ring has the opportunity to dispatch the GPU command.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   tests/Makefile.sources|1 +
   tests/gem_dummy_reloc_multi_bsd.c |  258 
  +
 
 I've meant that you add a new subtest to the existing gem_dummy_reloc
 test. With your patch here we essentially duplicate all the tests for the
 other rings.
 
   2 files changed, 259 insertions(+)
   create mode 100644 tests/gem_dummy_reloc_multi_bsd.c
  
  diff --git a/tests/Makefile.sources b/tests/Makefile.sources
  index 254a5c5..98f277f 100644
  --- a/tests/Makefile.sources
  +++ b/tests/Makefile.sources
  @@ -105,6 +105,7 @@ TESTS_progs = \
  gem_ring_sync_copy \
  gem_ring_sync_loop \
  gem_multi_bsd_sync_loop \
  +   gem_dummy_reloc_multi_bsd \
 
 Tests with subtests must be added to the TESTS_progs_M variable, otherwise
 piglit won't be able to enumerate the subtests. That's just an fyi for the
 next testcase, like I've said here it's imo better to just add a new
 subtest.
 

Thanks for the rules about how to add the test with subtests.(Sorry that
I don't know this rule)

OK. I will follow your comment to add it as subtests.

Thanks.
Yakui

 Also you've forgotten to update .gitignore, when building with your patch
 git status shows some not-added binaries.
 -Daniel
 
  gem_seqno_wrap \
  gem_set_tiling_vs_gtt \
  gem_set_tiling_vs_pwrite \
  diff --git a/tests/gem_dummy_reloc_multi_bsd.c 
  b/tests/gem_dummy_reloc_multi_bsd.c
  new file mode 100644
  index 000..ef8213e
  --- /dev/null
  +++ b/tests/gem_dummy_reloc_multi_bsd.c
  @@ -0,0 +1,258 @@
  +/*
  + * Copyright © 2014 Intel Corporation
  + *
  + * Permission is hereby granted, free of charge, to any person obtaining a
  + * copy of this software and associated documentation files (the 
  Software),
  + * to deal in the Software without restriction, including without 
  limitation
  + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  + * and/or sell copies of the Software, and to permit persons to whom the
  + * Software is furnished to do so, subject to the following conditions:
  + *
  + * The above copyright notice and this permission notice (including the 
  next
  + * paragraph) shall be included in all copies or substantial portions of 
  the
  + * Software.
  + *
  + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
  OR
  + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
  OTHER
  + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
  DEALINGS
  + * IN THE SOFTWARE.
  + *
  + * Authors:
  + *Daniel Vetter daniel.vet...@ffwll.ch (based on 
  gem_dummy_reloc_loop*.c)
  + *Zhao Yakui yakui.z...@intel.com
  + *
  + */
  +
  +#include stdlib.h
  +#include stdio.h
  +#include string.h
  +#include fcntl.h
  +#include inttypes.h
  +#include errno.h
  +#include sys/stat.h
  +#include sys/time.h
  +#include drm.h
  +#include ioctl_wrappers.h
  +#include drmtest.h
  +#include intel_bufmgr.h
  +#include intel_batchbuffer.h
  +#include intel_io.h
  +#include i830_reg.h
  +#include intel_chipset.h
  +
  +#define LOCAL_I915_EXEC_VEBOX (40)
  +
  +static drm_intel_bufmgr *bufmgr;
  +struct intel_batchbuffer *batch;
  +static drm_intel_bo *target_buffer;
  +
  +#define NUM_FD 50
  +
  +static int mfd[NUM_FD];
  +static drm_intel_bufmgr *mbufmgr[NUM_FD];
  +static struct intel_batchbuffer *mbatch[NUM_FD];
  +static drm_intel_bo *mbuffer[NUM_FD];
  +
  +
  +/*
  + * Testcase: Basic check of ring-cpu sync using a dummy reloc under 
  multi-fd
  + *
  + * The last test (that randomly switches the ring) seems to be pretty 
  effective
  + * at hitting the missed irq bug that's worked around with the HWSTAM irq 
  write.
  + */
  +
  +
  +#define MI_COND_BATCH_BUFFER_END   (0x3623 | 1)
  +#define MI_DO_COMPARE  (121)
  +static void
  +dummy_reloc_loop(int ring)
  +{
  +   int i;
  +   srandom(0xdeadbeef);
  +
  +   for (i = 0; i  0x10; i++) {
  +   int mindex = random() % NUM_FD;
  +
  +   batch = mbatch[mindex];
  +   if (ring == I915_EXEC_RENDER) {
  +   BEGIN_BATCH(4);
  +   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
  +   OUT_BATCH(0x); /* compare dword

Re: [Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 01:06 -0600, Daniel Vetter wrote:
 On Mon, Apr 14, 2014 at 12:19:58PM +0800, Zhao Yakui wrote:
  The Broadwell GT3 machine has two independent BSD rings in kernel driver 
  while
  it is transparent to the user-space driver. In such case it needs to check
  the CPU-GPU sync for the second BSD ring. Multi drm_fd can assure that the
  second BSD ring has the opportunity to dispatch the GPU command.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   tests/Makefile.sources|1 +
   tests/gem_dummy_reloc_multi_bsd.c |  258 
  +
 
 I've meant that you add a new subtest to the existing gem_dummy_reloc
 test. With your patch here we essentially duplicate all the tests for the
 other rings.
 
   2 files changed, 259 insertions(+)
   create mode 100644 tests/gem_dummy_reloc_multi_bsd.c
  
  diff --git a/tests/Makefile.sources b/tests/Makefile.sources
  index 254a5c5..98f277f 100644
  --- a/tests/Makefile.sources
  +++ b/tests/Makefile.sources
  @@ -105,6 +105,7 @@ TESTS_progs = \
  gem_ring_sync_copy \
  gem_ring_sync_loop \
  gem_multi_bsd_sync_loop \
  +   gem_dummy_reloc_multi_bsd \
 
 Tests with subtests must be added to the TESTS_progs_M variable, otherwise
 piglit won't be able to enumerate the subtests. That's just an fyi for the
 next testcase, like I've said here it's imo better to just add a new
 subtest.
 

Thanks for the rules about how to add the test with subtests.(Sorry that
I don't know this rule)

OK. I will follow your comment to add it as subtests.

 Also you've forgotten to update .gitignore, when building with your patch
 git status shows some not-added binaries.

BTW: How do I update the .gitigonre?
In my test I usually use the following step to create the corresponding
patches before sending and never update the .gitignore.
 a. use quilt tool to create it
 b. use git am to apply the corresponding patch on the working tree
 c. use git format-patch to get the corresponding patches that can
be sent by using git-send-email

Appreciate your helps.

Thanks.
Yakui
 -Daniel
 
  gem_seqno_wrap \
  gem_set_tiling_vs_gtt \
  gem_set_tiling_vs_pwrite \
  diff --git a/tests/gem_dummy_reloc_multi_bsd.c 
  b/tests/gem_dummy_reloc_multi_bsd.c
  new file mode 100644
  index 000..ef8213e
  --- /dev/null
  +++ b/tests/gem_dummy_reloc_multi_bsd.c
  @@ -0,0 +1,258 @@
  +/*
  + * Copyright © 2014 Intel Corporation
  + *
  + * Permission is hereby granted, free of charge, to any person obtaining a
  + * copy of this software and associated documentation files (the 
  Software),
  + * to deal in the Software without restriction, including without 
  limitation
  + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  + * and/or sell copies of the Software, and to permit persons to whom the
  + * Software is furnished to do so, subject to the following conditions:
  + *
  + * The above copyright notice and this permission notice (including the 
  next
  + * paragraph) shall be included in all copies or substantial portions of 
  the
  + * Software.
  + *
  + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
  OR
  + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
  OTHER
  + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
  DEALINGS
  + * IN THE SOFTWARE.
  + *
  + * Authors:
  + *Daniel Vetter daniel.vet...@ffwll.ch (based on 
  gem_dummy_reloc_loop*.c)
  + *Zhao Yakui yakui.z...@intel.com
  + *
  + */
  +
  +#include stdlib.h
  +#include stdio.h
  +#include string.h
  +#include fcntl.h
  +#include inttypes.h
  +#include errno.h
  +#include sys/stat.h
  +#include sys/time.h
  +#include drm.h
  +#include ioctl_wrappers.h
  +#include drmtest.h
  +#include intel_bufmgr.h
  +#include intel_batchbuffer.h
  +#include intel_io.h
  +#include i830_reg.h
  +#include intel_chipset.h
  +
  +#define LOCAL_I915_EXEC_VEBOX (40)
  +
  +static drm_intel_bufmgr *bufmgr;
  +struct intel_batchbuffer *batch;
  +static drm_intel_bo *target_buffer;
  +
  +#define NUM_FD 50
  +
  +static int mfd[NUM_FD];
  +static drm_intel_bufmgr *mbufmgr[NUM_FD];
  +static struct intel_batchbuffer *mbatch[NUM_FD];
  +static drm_intel_bo *mbuffer[NUM_FD];
  +
  +
  +/*
  + * Testcase: Basic check of ring-cpu sync using a dummy reloc under 
  multi-fd
  + *
  + * The last test (that randomly switches the ring) seems to be pretty 
  effective
  + * at hitting the missed irq bug that's worked around with the HWSTAM irq 
  write.
  + */
  +
  +
  +#define MI_COND_BATCH_BUFFER_END   (0x3623 | 1)
  +#define MI_DO_COMPARE  (121)
  +static void
  +dummy_reloc_loop(int ring

Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 01:22 -0600, Daniel Vetter wrote:
 On Mon, Apr 14, 2014 at 12:21:44PM +0800, Zhao Yakui wrote:
  V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
  This is only to add the support of dual BSD rings on BDW GT3 machine.
  The further optimization will be considered in another patch set.
  
  The BDW GT3 has two independent BSD rings, which can be used to process the
  video commands. To be simpler, it is transparent to user-space 
  driver/middle.
  Instead the kernel driver will decide which ring is to dispatch the BSD 
  video
  command.
  
  As every BSD ring is powerful, it is enough to dispatch the BSD video 
  command
  based on the drm fd. In such case it can play back video stream while 
  encoding
  another video stream. The coarse ping-pong mechanism is used to determine
  which BSD ring is used to dispatch the BSD video command.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   drivers/gpu/drm/i915/i915_dma.c|3 +++
   drivers/gpu/drm/i915/i915_drv.h|3 +++
   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   37 
  +++-
   3 files changed, 42 insertions(+), 1 deletion(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_dma.c 
  b/drivers/gpu/drm/i915/i915_dma.c
  index 0b38f88..4d27cf4 100644
  --- a/drivers/gpu/drm/i915/i915_dma.c
  +++ b/drivers/gpu/drm/i915/i915_dma.c
  @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
  long flags)
  spin_lock_init(dev_priv-backlight_lock);
  spin_lock_init(dev_priv-uncore.lock);
  spin_lock_init(dev_priv-mm.object_stat_lock);
  +   atomic_set(dev_priv-bsd_cmd_counter, 0);
  mutex_init(dev_priv-dpio_lock);
  mutex_init(dev_priv-modeset_restore_lock);
   
  @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, 
  struct drm_file *file)
   {
  struct drm_i915_file_private *file_priv = file-driver_priv;
   
  +   if (file_priv  file_priv-bsd_ring)
  +   file_priv-bsd_ring = NULL;
  kfree(file_priv);
   }
   
  diff --git a/drivers/gpu/drm/i915/i915_drv.h 
  b/drivers/gpu/drm/i915/i915_drv.h
  index ac5598c3..68e8166 100644
  --- a/drivers/gpu/drm/i915/i915_drv.h
  +++ b/drivers/gpu/drm/i915/i915_drv.h
  @@ -1466,6 +1466,8 @@ struct drm_i915_private {
  struct i915_dri1_state dri1;
  /* Old ums support infrastructure, same warning applies. */
  struct i915_ums_state ums;
  +   /* the lock for dispatch video commands on two BSD rings */
  +   atomic_t bsd_cmd_counter;
 
 You're still using atomic_t for no real good reason.
 gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock
 held, so there's really no reason for it.

If the struct_mutex is used in the gen8_dispatch_bsd_ring, I can remove
the atomic_t. 
It seems that the struct_mutex is a big lock and it is used very
frequently(i915_gem.c, i915_dma.c and so on). In my point it is a little
heavier than the atomic_t if one counter is increased and returned. 

If you think that the mutex is better than atomic, I will follow your
advice.

Thanks.
Yakui

 -Daniel
 
   };
   
   static inline struct drm_i915_private *to_i915(const struct drm_device 
  *dev)
  @@ -1673,6 +1675,7 @@ struct drm_i915_file_private {
   
  struct i915_hw_context *private_default_ctx;
  atomic_t rps_wait_boost;
  +   struct  intel_ring_buffer *bsd_ring;
   };
   
   /*
  diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
  b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  index 341ec68..720ef17 100644
  --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  @@ -999,6 +999,34 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
  return 0;
   }
   
  +/**
  + * Find one BSD ring to dispatch the corresponding BSD command.
  + * The Ring ID is returned.
  + */
  +static int gen8_dispatch_bsd_ring(struct drm_device *dev,
  + struct drm_file *file)
  +{
  +   struct drm_i915_private *dev_priv = dev-dev_private;
  +   struct drm_i915_file_private *file_priv = file-driver_priv;
  +
  +   /* Check whether the file_priv is using one ring */
  +   if (file_priv-bsd_ring)
  +   return file_priv-bsd_ring-id;
  +   else {
  +   /* If no, use the ping-pong mechanism to select one ring */
  +   int counter, ring_id;
  +   smp_mb__before_atomic_inc();
  +   counter = atomic_inc_return(dev_priv-bsd_cmd_counter);
  +   if (counter % 2 == 0)
  +   ring_id = VCS;
  +   else
  +   ring_id = VCS2;
  +
  +   file_priv-bsd_ring = dev_priv-ring[ring_id];
  +   return ring_id;
  +   }
  +}
  +
   static int
   i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 struct drm_file *file,
  @@ -1043,7 +1071,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
  *data,
   
  if ((args-flags  I915_EXEC_RING_MASK

Re: [Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 01:55 -0600, Daniel Vetter wrote:
 On Mon, Apr 14, 2014 at 9:32 AM, Zhao Yakui yakui.z...@intel.com wrote:
  BTW: How do I update the .gitigonre?
  In my test I usually use the following step to create the corresponding
  patches before sending and never update the .gitignore.
   a. use quilt tool to create it
   b. use git am to apply the corresponding patch on the working tree
   c. use git format-patch to get the corresponding patches that can
  be sent by using git-send-email
 
 It's a normal file in the corresponding directory. You can just edit
 it and add it to the patch.

Thanks.
Yakui
 -Daniel


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 02:19 -0600, Chris Wilson wrote:
 On Mon, Apr 14, 2014 at 04:05:19PM +0800, Zhao Yakui wrote:
  On Mon, 2014-04-14 at 01:22 -0600, Daniel Vetter wrote:
   You're still using atomic_t for no real good reason.
   gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock
   held, so there's really no reason for it.
  
  If the struct_mutex is used in the gen8_dispatch_bsd_ring, I can remove
  the atomic_t. 
  It seems that the struct_mutex is a big lock and it is used very
  frequently(i915_gem.c, i915_dma.c and so on). In my point it is a little
  heavier than the atomic_t if one counter is increased and returned. 
  
  If you think that the mutex is better than atomic, I will follow your
  advice.
 
 You are already holding the struct_mutex whenever we touch the ring and
 execbuffer. Even in a fine-grained world, there will still be a mutex
 around all operations that touch the rings.

Hi, Chris

I understand your concern. From the source code the struct_mutex
will be held when trying to do the buffer relocation and dispatch the
command in one ring. 
But my code is only to select one BSD ring. In such case the
atomic_t usage is enough and it is unnecessary to hold the struct_mutex.
If you also think that the struct_mutex is better, I can update the
code to use the struct_mutex.

Thanks.
Yakui


 -Chris
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t V2 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-14 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the CPU-GPU sync for the second BSD ring.

V1-V2: Follow Daniel's comment to add one subtext instead of one individual
test case, which is used to test the CPU-GPU sync under multi BSD rings

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/gem_dummy_reloc_loop.c |  102 +-
 1 file changed, 101 insertions(+), 1 deletion(-)

diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c
index a61b59b..660d8e1 100644
--- a/tests/gem_dummy_reloc_loop.c
+++ b/tests/gem_dummy_reloc_loop.c
@@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr;
 struct intel_batchbuffer *batch;
 static drm_intel_bo *target_buffer;
 
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
 /*
  * Testcase: Basic check of ring-cpu sync using a dummy reloc
  *
@@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings)
}
 }
 
+static void
+dummy_reloc_loop_random_ring_multi_fd(int num_rings)
+{
+   int i;
+   struct intel_batchbuffer *saved_batch;
+
+   saved_batch = batch;
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  0x10; i++) {
+   int mindex;
+   int ring = random() % num_rings + 1;
+
+   mindex = random() % NUM_FD;
+   batch = mbatch[mindex];
+
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_BATCH(0); /* reserved */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP | (122) | (0xf));
+   ADVANCE_BATCH();
+   }
+   intel_batchbuffer_flush_on_ring(batch, ring);
+
+   drm_intel_bo_map(target_buffer, 0);
+   // map to force waiting on rendering
+   drm_intel_bo_unmap(target_buffer);
+   }
+
+   batch = saved_batch;
+}
+
 int fd;
 int devid;
 int num_rings;
@@ -133,6 +184,7 @@ igt_main
igt_skip_on_simulation();
 
igt_fixture {
+   int i;
fd = drm_open_any();
devid = intel_get_drm_devid(fd);
num_rings = gem_get_num_rings(fd);
@@ -148,6 +200,35 @@ igt_main
 
target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 
4096);
igt_assert(target_buffer);
+
+   /* Create multi drm_fd and map one gem object to multi 
gem_contexts */
+   {
+   unsigned int target_flink;
+   char buffer_name[32];
+   if (dri_bo_flink(target_buffer, target_flink)) {
+   printf(fail to get flink for target buffer\n);
+   igt_assert(0);
+   }
+   for (i = 0; i  NUM_FD; i++) {
+   mfd[i] = 0;
+   mbufmgr[i] = NULL;
+   mbuffer[i] = NULL;
+   }
+   for (i = 0; i  NUM_FD; i++) {
+   sprintf(buffer_name, Target buffer %d\n, i);
+   mfd[i] = drm_open_any();
+   mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 
4096);
+   igt_assert(mbufmgr[i]);
+   drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i]);
+   mbatch[i] = intel_batchbuffer_alloc(mbufmgr[i], 
devid);
+   igt_assert(mbufmgr[i]);
+   mbuffer[i] = intel_bo_gem_create_from_name(
+   mbufmgr[i],
+   buffer_name,
+   target_flink);
+   igt_assert(mbuffer[i]);
+   }
+   }
}
 
igt_subtest(render) {
@@ -190,8 +271,27 @@ igt_main
printf(dummy loop run on random rings completed\n);
}
}
-
+   igt_subtest(mixed_multi_fd

[Intel-gfx] [PATCH I-g-t V2 0/2] Tests: Add test cases based on multi drm_fd to test sync

2014-04-14 Thread Zhao Yakui
This follows Daniel's advice to add the two test cases based on multi drm_fd to 
test the ring sync and CPU-GPU sync.
The Broadwell GT3 machine has two independent BSD rings that can be used
to process the video commands. This is implemented in kernel driver and 
transparent
to the user-space. But we still need to check the ring sync and CPU-GPU sync 
for
the second BSD ring. Two tests are created based on the multi drm_fds to
test the sync. Multi drm_fd can assure that the second BSD ring has the 
opportunity
to dispatch the GPU command. 

V1-V2: Follow Daniel's comment to add one subtext instead of one individual
test case, which is used to test the CPU-GPU sync under multi BSD rings/

Zhao Yakui (2):
  tests: Add one ring sync case based on multi drm_fd to test ring
semaphore sync under multi BSD rings
  tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to
test CPU-GPU sync under multi BSD rings

 tests/Makefile.sources  |1 +
 tests/gem_dummy_reloc_loop.c|  102 ++-
 tests/gem_multi_bsd_sync_loop.c |  172 +++
 3 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t V2 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings

2014-04-14 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the ring sync between the two BSD rings. At the same time it also needs to
check the sync among the second BSD ring and the other rings.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/Makefile.sources  |1 +
 tests/gem_multi_bsd_sync_loop.c |  172 +++
 2 files changed, 173 insertions(+)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c957ace..7cd9ca8 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -105,6 +105,7 @@ TESTS_progs = \
gem_render_tiled_blits \
gem_ring_sync_copy \
gem_ring_sync_loop \
+   gem_multi_bsd_sync_loop \
gem_seqno_wrap \
gem_set_tiling_vs_gtt \
gem_set_tiling_vs_pwrite \
diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c
new file mode 100644
index 000..7f5b832
--- /dev/null
+++ b/tests/gem_multi_bsd_sync_loop.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c)
+ *Zhao Yakui yakui.z...@intel.com
+ *
+ */
+
+#include stdlib.h
+#include stdio.h
+#include string.h
+#include fcntl.h
+#include inttypes.h
+#include errno.h
+#include sys/stat.h
+#include sys/time.h
+#include drm.h
+#include ioctl_wrappers.h
+#include drmtest.h
+#include intel_bufmgr.h
+#include intel_batchbuffer.h
+#include intel_io.h
+#include i830_reg.h
+#include intel_chipset.h
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
+
+/*
+ * Testcase: Basic check of ring-ring sync using a dummy reloc
+ *
+ * Extremely efficient at catching missed irqs with semaphores=0 ...
+ */
+
+#define MI_COND_BATCH_BUFFER_END   (0x3623 | 1)
+#define MI_DO_COMPARE  (121)
+
+static void
+store_dword_loop(int fd)
+{
+   int i;
+   int num_rings = gem_get_num_rings(fd);
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  SLOW_QUICK(0x10, 10); i++) {
+   int ring, mindex;
+   ring = random() % num_rings + 1;
+   mindex = random() % NUM_FD;
+   batch = mbatch[mindex];
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_BATCH(0); /* reserved */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP | (122) | (0xf));
+   ADVANCE_BATCH();
+   }
+   intel_batchbuffer_flush_on_ring(batch, ring);
+   }
+
+   drm_intel_bo_map(target_buffer, 0);
+   // map to force waiting on rendering
+   drm_intel_bo_unmap(target_buffer);
+}
+
+igt_simple_main
+{
+   int fd;
+   int devid;
+   int i;
+
+   fd = drm_open_any();
+   devid = intel_get_drm_devid(fd);
+   gem_require_ring(fd, I915_EXEC_BLT

Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-13 Thread Zhao Yakui
On Fri, 2014-04-11 at 02:57 -0600, Daniel Vetter wrote:
 On Fri, Apr 11, 2014 at 08:56:28AM +0800, Zhao Yakui wrote:
  On Thu, 2014-04-10 at 03:04 -0600, Daniel Vetter wrote:
   On Thu, Apr 10, 2014 at 04:28:34PM +0800, Zhao Yakui wrote:
BTW: Does it need to check all the flags defined in i915_drm.h or the
exported flag returned by i915_get_parameter?
   
   I don't have i915_get_parameter anywhere in my sources, so no idea what
   you mean ...
  
  Sorry that the function should be i915_getparam. It is called by the
  I915_GETPARAM ioctl to query the flag supported by the driver.
 
 Ah, now I understand. The idea is to test all fields of the structure
 exhaustively (so also rsvd to make sure it's 0). Well except for the
 buffer count field since we have tests for that already.
 
 For the reasons see my two blog posts on the topic:
 
 http://blog.ffwll.ch/2013/11/testing-requirements-for-drmi915.html
 
 http://blog.ffwll.ch/2013/11/botching-up-ioctls.html

OK. It seems that the case needs to check more fields than the exported
flag.
I will take a look at your blog and understand how to write the test
case.

Thanks.
Yakui

 
 Cheers, Daniel


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings

2014-04-13 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the CPU-GPU sync for the second BSD ring. Multi drm_fd can assure that the
second BSD ring has the opportunity to dispatch the GPU command.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/Makefile.sources|1 +
 tests/gem_dummy_reloc_multi_bsd.c |  258 +
 2 files changed, 259 insertions(+)
 create mode 100644 tests/gem_dummy_reloc_multi_bsd.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 254a5c5..98f277f 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -105,6 +105,7 @@ TESTS_progs = \
gem_ring_sync_copy \
gem_ring_sync_loop \
gem_multi_bsd_sync_loop \
+   gem_dummy_reloc_multi_bsd \
gem_seqno_wrap \
gem_set_tiling_vs_gtt \
gem_set_tiling_vs_pwrite \
diff --git a/tests/gem_dummy_reloc_multi_bsd.c 
b/tests/gem_dummy_reloc_multi_bsd.c
new file mode 100644
index 000..ef8213e
--- /dev/null
+++ b/tests/gem_dummy_reloc_multi_bsd.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_dummy_reloc_loop*.c)
+ *Zhao Yakui yakui.z...@intel.com
+ *
+ */
+
+#include stdlib.h
+#include stdio.h
+#include string.h
+#include fcntl.h
+#include inttypes.h
+#include errno.h
+#include sys/stat.h
+#include sys/time.h
+#include drm.h
+#include ioctl_wrappers.h
+#include drmtest.h
+#include intel_bufmgr.h
+#include intel_batchbuffer.h
+#include intel_io.h
+#include i830_reg.h
+#include intel_chipset.h
+
+#define LOCAL_I915_EXEC_VEBOX (40)
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
+
+/*
+ * Testcase: Basic check of ring-cpu sync using a dummy reloc under multi-fd
+ *
+ * The last test (that randomly switches the ring) seems to be pretty effective
+ * at hitting the missed irq bug that's worked around with the HWSTAM irq 
write.
+ */
+
+
+#define MI_COND_BATCH_BUFFER_END   (0x3623 | 1)
+#define MI_DO_COMPARE  (121)
+static void
+dummy_reloc_loop(int ring)
+{
+   int i;
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  0x10; i++) {
+   int mindex = random() % NUM_FD;
+
+   batch = mbatch[mindex];
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(0); /* reserved */
+   OUT_BATCH(MI_NOOP | (122) | (0xf));
+   ADVANCE_BATCH();
+   }
+   intel_batchbuffer_flush_on_ring(batch, ring);
+
+   drm_intel_bo_map(target_buffer, 0);
+   // map to force completion
+   drm_intel_bo_unmap(target_buffer);
+   }
+}
+
+static void
+dummy_reloc_loop_random_ring(int num_rings)
+{
+   int i;
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  0x10; i

[Intel-gfx] [PATCH I-g-t 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync

2014-04-13 Thread Zhao Yakui
The Broadwell GT3 machine has two independent BSD rings in kernel driver while
it is transparent to the user-space driver. In such case it needs to check
the ring sync between the two BSD rings. At the same time it also needs to
check the sync among the second BSD ring and the other rings. Multi drm_fd can
assure that the second BSD ring has the opportunity to dispatch the GPU 
command. 

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 tests/Makefile.sources  |1 +
 tests/gem_multi_bsd_sync_loop.c |  172 +++
 2 files changed, 173 insertions(+)
 create mode 100644 tests/gem_multi_bsd_sync_loop.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bf02a48..254a5c5 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -104,6 +104,7 @@ TESTS_progs = \
gem_render_tiled_blits \
gem_ring_sync_copy \
gem_ring_sync_loop \
+   gem_multi_bsd_sync_loop \
gem_seqno_wrap \
gem_set_tiling_vs_gtt \
gem_set_tiling_vs_pwrite \
diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c
new file mode 100644
index 000..7f5b832
--- /dev/null
+++ b/tests/gem_multi_bsd_sync_loop.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c)
+ *Zhao Yakui yakui.z...@intel.com
+ *
+ */
+
+#include stdlib.h
+#include stdio.h
+#include string.h
+#include fcntl.h
+#include inttypes.h
+#include errno.h
+#include sys/stat.h
+#include sys/time.h
+#include drm.h
+#include ioctl_wrappers.h
+#include drmtest.h
+#include intel_bufmgr.h
+#include intel_batchbuffer.h
+#include intel_io.h
+#include i830_reg.h
+#include intel_chipset.h
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+
+#define NUM_FD 50
+
+static int mfd[NUM_FD];
+static drm_intel_bufmgr *mbufmgr[NUM_FD];
+static struct intel_batchbuffer *mbatch[NUM_FD];
+static drm_intel_bo *mbuffer[NUM_FD];
+
+
+/*
+ * Testcase: Basic check of ring-ring sync using a dummy reloc
+ *
+ * Extremely efficient at catching missed irqs with semaphores=0 ...
+ */
+
+#define MI_COND_BATCH_BUFFER_END   (0x3623 | 1)
+#define MI_DO_COMPARE  (121)
+
+static void
+store_dword_loop(int fd)
+{
+   int i;
+   int num_rings = gem_get_num_rings(fd);
+
+   srandom(0xdeadbeef);
+
+   for (i = 0; i  SLOW_QUICK(0x10, 10); i++) {
+   int ring, mindex;
+   ring = random() % num_rings + 1;
+   mindex = random() % NUM_FD;
+   batch = mbatch[mindex];
+   if (ring == I915_EXEC_RENDER) {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+   OUT_BATCH(0x); /* compare dword */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP);
+   ADVANCE_BATCH();
+   } else {
+   BEGIN_BATCH(4);
+   OUT_BATCH(MI_FLUSH_DW | 1);
+   OUT_BATCH(0); /* reserved */
+   OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_RENDER, 0);
+   OUT_BATCH(MI_NOOP | (122) | (0xf));
+   ADVANCE_BATCH();
+   }
+   intel_batchbuffer_flush_on_ring(batch, ring);
+   }
+
+   drm_intel_bo_map(target_buffer, 0);
+   // map to force waiting on rendering
+   drm_intel_bo_unmap(target_buffer);
+}
+
+igt_simple_main
+{
+   int fd;
+   int devid;
+   int i;
+
+   fd

[Intel-gfx] [PATCH V2 3/6] drm/i915:Handle the irq interrupt for the second BSD ring

2014-04-13 Thread Zhao Yakui
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_irq.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7a4d3ae..63bd5de 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
DRM_ERROR(The master control interrupt lied (GT0)!\n);
}
 
-   if (master_ctl  GEN8_GT_VCS1_IRQ) {
+   if (master_ctl  (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
tmp = I915_READ(GEN8_GT_IIR(1));
if (tmp) {
ret = IRQ_HANDLED;
vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
if (vcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[VCS]);
+   vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
+   if (vcs  GT_RENDER_USER_INTERRUPT)
+   notify_ring(dev, dev_priv-ring[VCS2]);
I915_WRITE(GEN8_GT_IIR(1), tmp);
} else
DRM_ERROR(The master control interrupt lied (GT1)!\n);
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-13 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 machine has two independent
BSD ring that can be used to dispatch the video commands.
So just initialize it.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |4 +--
 drivers/gpu/drm/i915/i915_drv.h |2 ++
 drivers/gpu/drm/i915/i915_gem.c |9 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |1 +
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   54 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++-
 7 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 17fbbe5..2a7842b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -282,7 +282,7 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
 static const struct intel_device_info intel_broadwell_gt3d_info = {
.gen = 8, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
@@ -292,7 +292,7 @@ static const struct intel_device_info 
intel_broadwell_gt3d_info = {
 static const struct intel_device_info intel_broadwell_gt3m_info = {
.gen = 8, .is_mobile = 1, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 761fc53..ac5598c3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1827,7 +1827,9 @@ struct drm_i915_cmd_table {
 #define BSD_RING   (1VCS)
 #define BLT_RING   (1BCS)
 #define VEBOX_RING (1VECS)
+#define BSD2_RING  (1VCS2)
 #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
+#define HAS_BSD2(dev)  (INTEL_INFO(dev)-ring_mask  BSD2_RING)
 #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
 #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
 #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85c9cf0..b4dcf2a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_blt_ring;
}
 
+   if (HAS_BSD2(dev)) {
+   ret = intel_init_bsd2_ring_buffer(dev);
+   if (ret)
+   goto cleanup_vebox_ring;
+   }
 
ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
if (ret)
-   goto cleanup_vebox_ring;
+   goto cleanup_ring;
 
return 0;
 
+cleanup_ring:
+   intel_cleanup_ring_buffer(dev_priv-ring[VCS2]);
 cleanup_vebox_ring:
intel_cleanup_ring_buffer(dev_priv-ring[VECS]);
 cleanup_blt_ring:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4865ade..3cab7f9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,6 +42,7 @@ static const char *ring_str(int ring)
case VCS: return bsd;
case BCS: return blt;
case VECS: return vebox;
+   case VCS2: return second bsd;
default: return ;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8f84555..0b88508 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -760,6 +760,7 @@ enum punit_power_well {
 #define RENDER_RING_BASE   0x02000
 #define BSD_RING_BASE  0x04000
 #define GEN6_BSD_RING_BASE 0x12000
+#define GEN8_BSD2_RING_BASE0x1c000
 #define VEBOX_RING_BASE0x1a000
 #define BLT_RING_BASE  0x22000
 #define RING_TAIL(base)((base)+0x30)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eb3dd26..8b9b89080 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1924,10 +1924,12 @@ int intel_init_render_ring_buffer(struct drm_device 
*dev)
ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV;
ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB;
ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE;
+   ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
ring-signal_mbox[RCS] = GEN6_NOSYNC;
ring-signal_mbox[VCS] = GEN6_VRSYNC

[Intel-gfx] [PATCH V2 4/6] drm/i915: Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning

2014-04-13 Thread Zhao Yakui
V1-V2: Follow Daniel's comment to update the comment

The Gen7 doesn't have the second BSD ring. But it will complain the switch check
warning message during compilation. So just add it to remove the
switch check warning.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8b9b89080..2c89525 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer 
*ring)
case BCS:
mmio = BLT_HWS_PGA_GEN7;
break;
+   /*
+* VCS2 actually doesn't exist on Gen7. Only shut up
+* gcc switch check warning
+*/
+   case VCS2:
case VCS:
mmio = BSD_HWS_PGA_GEN7;
break;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3

2014-04-13 Thread Zhao Yakui
V1-V2: Follow Daniel's comment to consider the stolen check for BDW in
kernel/early-quirks.c

Based on the hardware spec, the BDW GT3 has the different configuration
with the BDW GT1/GT2. So split the BDW device info definition.
This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |   26 --
 include/drm/i915_pciids.h   |   22 +-
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5d8250f..17fbbe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -279,6 +279,26 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
GEN_DEFAULT_PIPEOFFSETS,
 };
 
+static const struct intel_device_info intel_broadwell_gt3d_info = {
+   .gen = 8, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3m_info = {
+   .gen = 8, .is_mobile = 1, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -311,8 +331,10 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
INTEL_HSW_M_IDS(intel_haswell_m_info), \
INTEL_VLV_M_IDS(intel_valleyview_m_info),  \
INTEL_VLV_D_IDS(intel_valleyview_d_info),  \
-   INTEL_BDW_M_IDS(intel_broadwell_m_info),   \
-   INTEL_BDW_D_IDS(intel_broadwell_d_info)
+   INTEL_BDW_GT12M_IDS(intel_broadwell_m_info),   \
+   INTEL_BDW_GT12D_IDS(intel_broadwell_d_info),   \
+   INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \
+   INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info)
 
 static const struct pci_device_id pciidlist[] = {  /* aka */
INTEL_PCI_IDS,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 940ece4..24f3cad 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -223,14 +223,26 @@
_INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
_INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
 
-#define INTEL_BDW_M_IDS(info) \
+#define INTEL_BDW_GT12M_IDS(info) \
_INTEL_BDW_M_IDS(1, info), \
-   _INTEL_BDW_M_IDS(2, info), \
-   _INTEL_BDW_M_IDS(3, info)
+   _INTEL_BDW_M_IDS(2, info)
 
-#define INTEL_BDW_D_IDS(info) \
+#define INTEL_BDW_GT12D_IDS(info) \
_INTEL_BDW_D_IDS(1, info), \
-   _INTEL_BDW_D_IDS(2, info), \
+   _INTEL_BDW_D_IDS(2, info)
+
+#define INTEL_BDW_GT3M_IDS(info) \
+   _INTEL_BDW_M_IDS(3, info)
+
+#define INTEL_BDW_GT3D_IDS(info) \
_INTEL_BDW_D_IDS(3, info)
 
+#define INTEL_BDW_M_IDS(info) \
+   INTEL_BDW_GT12M_IDS(info), \
+   INTEL_BDW_GT3M_IDS(info)
+
+#define INTEL_BDW_D_IDS(info) \
+   INTEL_BDW_GT12D_IDS(info), \
+   INTEL_BDW_GT3D_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-13 Thread Zhao Yakui
V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
This is only to add the support of dual BSD rings on BDW GT3 machine.
The further optimization will be considered in another patch set.

The BDW GT3 has two independent BSD rings, which can be used to process the
video commands. To be simpler, it is transparent to user-space driver/middle.
Instead the kernel driver will decide which ring is to dispatch the BSD video
command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case it can play back video stream while encoding
another video stream. The coarse ping-pong mechanism is used to determine
which BSD ring is used to dispatch the BSD video command.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c|3 +++
 drivers/gpu/drm/i915/i915_drv.h|3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   37 +++-
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0b38f88..4d27cf4 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
spin_lock_init(dev_priv-backlight_lock);
spin_lock_init(dev_priv-uncore.lock);
spin_lock_init(dev_priv-mm.object_stat_lock);
+   atomic_set(dev_priv-bsd_cmd_counter, 0);
mutex_init(dev_priv-dpio_lock);
mutex_init(dev_priv-modeset_restore_lock);
 
@@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct 
drm_file *file)
 {
struct drm_i915_file_private *file_priv = file-driver_priv;
 
+   if (file_priv  file_priv-bsd_ring)
+   file_priv-bsd_ring = NULL;
kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ac5598c3..68e8166 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1466,6 +1466,8 @@ struct drm_i915_private {
struct i915_dri1_state dri1;
/* Old ums support infrastructure, same warning applies. */
struct i915_ums_state ums;
+   /* the lock for dispatch video commands on two BSD rings */
+   atomic_t bsd_cmd_counter;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -1673,6 +1675,7 @@ struct drm_i915_file_private {
 
struct i915_hw_context *private_default_ctx;
atomic_t rps_wait_boost;
+   struct  intel_ring_buffer *bsd_ring;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 341ec68..720ef17 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -999,6 +999,34 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+/**
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The Ring ID is returned.
+ */
+static int gen8_dispatch_bsd_ring(struct drm_device *dev,
+ struct drm_file *file)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+
+   /* Check whether the file_priv is using one ring */
+   if (file_priv-bsd_ring)
+   return file_priv-bsd_ring-id;
+   else {
+   /* If no, use the ping-pong mechanism to select one ring */
+   int counter, ring_id;
+   smp_mb__before_atomic_inc();
+   counter = atomic_inc_return(dev_priv-bsd_cmd_counter);
+   if (counter % 2 == 0)
+   ring_id = VCS;
+   else
+   ring_id = VCS2;
+
+   file_priv-bsd_ring = dev_priv-ring[ring_id];
+   return ring_id;
+   }
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
   struct drm_file *file,
@@ -1043,7 +1071,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
 
if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
ring = dev_priv-ring[RCS];
-   else
+   else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
+   if (HAS_BSD2(dev)) {
+   int ring_id;
+   ring_id = gen8_dispatch_bsd_ring(dev, file);
+   ring = dev_priv-ring[ring_id];
+   } else
+   ring = dev_priv-ring[VCS];
+   } else
ring = dev_priv-ring[(args-flags  I915_EXEC_RING_MASK) - 1];
 
if (!intel_ring_initialized(ring)) {
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-13 Thread Zhao Yakui
V1-V2: Follow Daniel's comment to do the following update:
   a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01
   b. update the comment in Patch 04
   c. use the simple ping-pong mechanism to add the support of dual BSD rings.
The further optimization will be considered in another patch set.


This is the patch set that tries to add the support of dual BSD rings on BDW
GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which
can be used to process the video commands. To be simpler, it is transparent 
to user-space driver/middleware. In such case the kernel driver will decide
which ring is to dispatch the BSD video command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case the different BSD ring is used for video 
playing
back and encoding. 

Zhao Yakui (6):
  drm/i915: Split the BDW device definition to prepare for dual BSD
rings on BDW GT3
  drm/i915:Initialize the second BSD ring on BDW GT3 machine
  drm/i915:Handle the irq interrupt for the second BSD ring
  drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to
remove the switch check warning
  drm/i915:Update the restrict check to filter out wrong Ring ID
passed by user-space
  drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the
BSD command on BDW GT3

 drivers/gpu/drm/i915/i915_dma.c|3 ++
 drivers/gpu/drm/i915/i915_drv.c|   26 +++-
 drivers/gpu/drm/i915/i915_drv.h|5 +++
 drivers/gpu/drm/i915/i915_gem.c|9 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   39 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |1 +
 drivers/gpu/drm/i915/i915_irq.c|5 ++-
 drivers/gpu/drm/i915/i915_reg.h|1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|   59 
 drivers/gpu/drm/i915/intel_ringbuffer.h|5 ++-
 include/drm/i915_pciids.h  |   22 ---
 11 files changed, 163 insertions(+), 12 deletions(-)

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2 5/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space

2014-04-13 Thread Zhao Yakui
One extra ring is added in the kernel driver but it is transparent to the
user-space application/middleware. In such case the number of the rings
in kernel driver is bigger than that exported to the user-space. So
it needs to filter out the wrong Ring ID passed by user-space.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h|1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3491402..341ec68 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args-flags  I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
 
-   if ((args-flags  I915_EXEC_RING_MASK)  I915_NUM_RINGS) {
+   if ((args-flags  I915_EXEC_RING_MASK)  LAST_USER_RING) {
DRM_DEBUG(execbuf with unknown ring: %d\n,
  (int)(args-flags  I915_EXEC_RING_MASK));
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8ca4285..59f4cdd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -64,6 +64,7 @@ struct  intel_ring_buffer {
VCS2,
} id;
 #define I915_NUM_RINGS 5
+#define LAST_USER_RING (VECS + 1)
u32 mmio_base;
void__iomem *virtual_start;
struct  drm_device *dev;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-10 Thread Zhao Yakui
On Thu, 2014-04-10 at 00:48 -0600, Daniel Vetter wrote:
 On Thu, Apr 10, 2014 at 10:24:53AM +0800, Zhao Yakui wrote:
  On Wed, 2014-04-09 at 08:34 -0600, Daniel Vetter wrote:
   On Wed, Apr 09, 2014 at 09:59:56AM +0800, Zhao Yakui wrote:
The BDW GT3 has two independent BSD rings, which can be used to process 
the
video commands. To be simpler, it is transparent to user-space 
driver/middleware.
Instead the kernel driver will decide which ring is to dispatch the BSD 
video
command.

As every BSD ring is powerful, it is enough to dispatch the BSD video 
command
based on the drm fd. In such case the different BSD ring is used for 
video playing
back and encoding. At the same time the coarse dispatch mechanism can 
help to avoid
the object synchronization between the BSD rings.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
   
   This looks way too complicated. First things first please get rid of the
   atomic_t usage. If you don't have _massive_ comments explaining the memory
   barriers you're most likely using linux kernel atomic_t wrong. They are
   fully unordered.
  
  Thanks for the review.
  
  For the atomic_t usage:  I will remove it in next version as the counter
  is already protected by the lock.  
  
   
   With that out of the way this still looks a bit complicated really. Can't
   we just use a very simple static rule in gen8_dispatch_bsd_ring which
   hashed the pointer address of the file_priv? Just to get things going,
   once we have a clear need we can try to make things more intelligent. But
   in case of doubt I really prefer if we start with the dumbest possible
   approach first and add complexity instead of starting with something
   really complex and simplifying it.
  
  Do you mean that file_priv is hashed and then is mapped to BSD 0 or 1
  ring?  
 
 Yeah, that's the idea. Get in the basic support first, make it fancy like
 you describe below second. This has a few upsides:
 - We can concentrate on validating basic support in the first round
   instead of potentially fighting a bug in the load balancer.
 - Discussions and performance testing for the load balancer won't hold up
   the entire feature.
 - Like I've said this might not be required. Before we add more complexity
   than just hashing the file_priv I want to see some benchmarks of
   expected workloads that show that the load balancing is indeed a good
   idea - for the case of a transcode server I guess we should have
   sufficient in-flight operations that it won't really matter. Or at least
   I hope so.
 

OK. Understand your concerns. I can split it two steps. One is to add
the basic support. The second step is for the optimization.

But I don't think that the hash of file_priv is a good idea. As it only
has two rings, it is possible that the hash value is always mapped to
BSD ring 0.  In such case when multiples video clips are played back,
the performance can't meet with the requirement.(For example: User can
play back 4 1080p video clips concurrently when only one BSD ring is
used. On the BDW GT3, they hope to play back 8 1080p video clips
concurrently. The poor hash design will cause that all the workload are
mapped to one BSD ring and then it can't meet with the requirement).

How about using the ping-pong mechanism for the file_priv? For one new
fd, it will use BSD ring 0 and then next file_priv will use BSD ring 1.
Then BSD ring 0BSD ring 1. 

Does this make sense to you?


 So maybe split this patch up into the first step with the basic file_priv
 hashing mapping and the 2nd patch to add the improved algo?
 
 Cheers, Daniel
 
  The GT3 machine has two independent BSD rings. It will be better that
  the kernel driver can balance the video workload between the two rings. 
  When using the hashed file_priv to select BSD ring, the video balance
  depends on the design of hash design. Under some scenarios, it will be
  possible that one ring is very busy while another ring is very idle. And
  then performance of video playing back/encoding will be affected.
  At the same time the hash mechanism is only used to select the
  corresponding BSD ring when one drm_fd is opened. And it doesn't
  consider the video workload balance after finishing some workloads.
  
  The following is the basic idea in my patch.(A counter variable is added
  for ring. The bigger the counter, the higher the workload).
 a. When one new fd needs to dispatch the BSD video command, it will
  select the ring with the lowest workload(lowest counter). And then
  counter in this ring will be added.
 b. when the drm fd is closed(the workload is finished), the counter
  of the ring used by file_priv will be decreased. 
 c. When the drm fd already selects one BSD ring in previously
  submitted command, it will check whether it is using the ring with the
  lowest workload(lowest counter). If not, it can be switched. The purpose
  is to assure that the workload

Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-10 Thread Zhao Yakui
On Thu, 2014-04-10 at 00:58 -0600, Daniel Vetter wrote:
 On Thu, Apr 10, 2014 at 11:28:46AM +0800, Zhao Yakui wrote:
  On Wed, 2014-04-09 at 08:45 -0600, Daniel Vetter wrote:
   On Wed, Apr 09, 2014 at 09:59:51AM +0800, Zhao Yakui wrote:

This is the patch set that tries to add the support of dual BSD rings 
on BDW
GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, 
which
can be used to process the video commands. To be simpler, it is 
transparent 
to user-space driver/middleware. In such case the kernel driver will 
decide
which ring is to dispatch the BSD video command.

As every BSD ring is powerful, it is enough to dispatch the BSD video 
command
based on the drm fd. In such case the different BSD ring is used for 
video playing
back and encoding. At the same time the coarse dispatch mechanism can 
help to avoid
the object synchronization between the BSD rings.
   
   Ok, I've quickly read through it all and commented on a few things. Imo
   the last patch should be massively simplified, at least for the first
   round. Other things look small.
   
  Hi, Daniel
  
  Thanks for your review.
  
   What's still missing are testcases, and I have two things in mind here:
   - Exercise the 2nd ring dispatch and sync a bit. Since the 2nd bsd ring is
 hidden within the kernel I think the right approach would be to open a
 few drm fds (10 or so) and then randomly use them with a dummy reloc. We
 have two testcases which can be used as blueprints that need
 adjustement:
   
 - gem_ring_sync_loop: Probably easiest to copy it to a new file as
   gem_multi_bsd_sync_loop. This test exercises semaphores.
 - gem_dummy_reloc_loop, subtest mixed: Almost the same as the above, but
   the sync is done _inside_ the loop and hence this exercises gpu/cpu
   sync. We need both tests adjusted, for for this we need a new
   multi-bsd test.
  
  Agree with your concerns. I will try to add the
  gem_multi_bsd_sync_loop/dummy_reloc_loop test case so that it can test
  the sync with multi-BSD.
  
  BTW: How about if I directly add multiple fds in gem_ring_sync_loop test
  case and then test the sync among the different rings?  In such case the
  user-application doesn't need to know the existence of multi-BSD rings.
 
 We don't need it for the other rings, so I think it's better to leave the
 existing tests as-is to avoid introducing bugs. Testing testcase is always
 fairly hard, since you have to break your kernel to make sure the test
 still catches bugs ;-)
 
 Also for testing VCS1 and VCS2 we need to have multiple fd using the
 _same_ logical ring exposed to userspace, so the test logic will look a
 bit different anyway.

OK. I will add the separated two test cases for it.

 
   - New testcase to fully test main execbuffer flags. This is simply
 something that's we don't yet have. The next guy to touch execbuf code
 needs to add it, and it looks like that's you ;-) I've done a JIRA task
 for the resource streamer work, but I think the resource streamer wont
 be merged anytime soon. So I'll reassign to you. Jira task is VIZ-3129.
  
  For the new testcase of execbuffer flag:  Do you have any idea about
  which kind of exec flag needs to be checked? Do you have any idea about
  the expected failure/successful behavour for the flags?
 For example: I915_EXEC_PINNED : If one object is not pinned and
  submitted, what behavour is expected? Fail or wrong?
 
 I've clarified the JIRA, the test is just for the flags/values in the main
 execbuf structure. And the idea is to do the basic api sanity checking as
 outlined in my blog post
 
 http://blog.ffwll.ch/2013/11/botching-up-ioctls.html
 
 i.e. go through all fields in struct drm_i915_gem_execbuffer2 and write a
 test which checks that the kernel correctly rejects invalid input data. So
 e.g. for pointer you can supply NULL or a pointer to invalid memory,
 buffer count is already checked with the overflow tests, but also invalid
 flags and also making sure that if reserved fields aren't 0 the kernel
 rejects the batch.
 
 Of course to be able to check this you first need to construct a valid
 no-op batch (e.g. copy from gem_exec_nop.c) and submit it (to make sure no
 one breaks the test later on). Then each subtest only changes the relevant
 field to make sure the kernel really did check the field (and not just
 returned -EINVAL due to something else).
 
 Some execbuf fields are special and e.g. contexts are not valid when
 there's no hw context support.
 
 If you want to look for examples check out the basic api tests for
 recently added ioctls like in gem_reset_stats.c. Execbuffer ioctl is
 simply a bit more complex.

OK. I will take a look at your blog and understand what you mentioned.

BTW: Does it need to check all the flags defined in i915_drm.h or the
exported flag returned by i915_get_parameter?

Thanks.
Yakui

 
 Cheers

Re: [Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-10 Thread Zhao Yakui
On Thu, 2014-04-10 at 03:03 -0600, Daniel Vetter wrote:
 On Thu, Apr 10, 2014 at 04:04:22PM +0800, Zhao Yakui wrote:
  On Thu, 2014-04-10 at 00:48 -0600, Daniel Vetter wrote:
   On Thu, Apr 10, 2014 at 10:24:53AM +0800, Zhao Yakui wrote:
On Wed, 2014-04-09 at 08:34 -0600, Daniel Vetter wrote:
 On Wed, Apr 09, 2014 at 09:59:56AM +0800, Zhao Yakui wrote:
  The BDW GT3 has two independent BSD rings, which can be used to 
  process the
  video commands. To be simpler, it is transparent to user-space 
  driver/middleware.
  Instead the kernel driver will decide which ring is to dispatch the 
  BSD video
  command.
  
  As every BSD ring is powerful, it is enough to dispatch the BSD 
  video command
  based on the drm fd. In such case the different BSD ring is used 
  for video playing
  back and encoding. At the same time the coarse dispatch mechanism 
  can help to avoid
  the object synchronization between the BSD rings.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
 
 This looks way too complicated. First things first please get rid of 
 the
 atomic_t usage. If you don't have _massive_ comments explaining the 
 memory
 barriers you're most likely using linux kernel atomic_t wrong. They 
 are
 fully unordered.

Thanks for the review.

For the atomic_t usage:  I will remove it in next version as the counter
is already protected by the lock.  

 
 With that out of the way this still looks a bit complicated really. 
 Can't
 we just use a very simple static rule in gen8_dispatch_bsd_ring which
 hashed the pointer address of the file_priv? Just to get things going,
 once we have a clear need we can try to make things more intelligent. 
 But
 in case of doubt I really prefer if we start with the dumbest possible
 approach first and add complexity instead of starting with something
 really complex and simplifying it.

Do you mean that file_priv is hashed and then is mapped to BSD 0 or 1
ring?  
   
   Yeah, that's the idea. Get in the basic support first, make it fancy like
   you describe below second. This has a few upsides:
   - We can concentrate on validating basic support in the first round
 instead of potentially fighting a bug in the load balancer.
   - Discussions and performance testing for the load balancer won't hold up
 the entire feature.
   - Like I've said this might not be required. Before we add more complexity
 than just hashing the file_priv I want to see some benchmarks of
 expected workloads that show that the load balancing is indeed a good
 idea - for the case of a transcode server I guess we should have
 sufficient in-flight operations that it won't really matter. Or at least
 I hope so.
   
  
  OK. Understand your concerns. I can split it two steps. One is to add
  the basic support. The second step is for the optimization.
  
  But I don't think that the hash of file_priv is a good idea. As it only
  has two rings, it is possible that the hash value is always mapped to
  BSD ring 0.  In such case when multiples video clips are played back,
  the performance can't meet with the requirement.(For example: User can
  play back 4 1080p video clips concurrently when only one BSD ring is
  used. On the BDW GT3, they hope to play back 8 1080p video clips
  concurrently. The poor hash design will cause that all the workload are
  mapped to one BSD ring and then it can't meet with the requirement).
  
  How about using the ping-pong mechanism for the file_priv? For one new
  fd, it will use BSD ring 0 and then next file_priv will use BSD ring 1.
  Then BSD ring 0BSD ring 1. 
  
  Does this make sense to you?
 
 Well the point of the hash is that it's dumb and simple, but maybe too
 dumb. If we wend up with 3 streams on one vcs and 1 on the other, then we
 have a good reason to merge the 2nd patch ;-)
 

Hi, Daniel

Thanks for your comments. Now we get get the same point about the
support of dual BSD rings on BDW GT3 machine.  So this will be divided
into two steps. The first step is to use the simple ping-pong mechanism
to add the basic support. And the second step is for the
optimization(balance video workloads among the two rings).
From my point the ping-pong mechanism is simpler and easier to
implement. Of course this can also be regarded as the specific hash.

 Really, the point of the first patch is just so that we have /something/
 which uses both rings with a reasonable chance, so that we can get testing
 and validation off the ground. E.g. in the test I'd use 10 or so drm fds to
 make sure that at least one of them uses the other ring, in case the hash
 function isn't great.

   Understand. We need such test case to verify it. This is already in
my plan.

Thanks.
Yakui





 -Daniel


___
Intel-gfx mailing list
Intel

Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-10 Thread Zhao Yakui
On Thu, 2014-04-10 at 03:04 -0600, Daniel Vetter wrote:
 On Thu, Apr 10, 2014 at 04:28:34PM +0800, Zhao Yakui wrote:
  BTW: Does it need to check all the flags defined in i915_drm.h or the
  exported flag returned by i915_get_parameter?
 
 I don't have i915_get_parameter anywhere in my sources, so no idea what
 you mean ...

Sorry that the function should be i915_getparam. It is called by the
I915_GETPARAM ioctl to query the flag supported by the driver.

Thanks.
Yakui

 -Daniel


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/5] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3

2014-04-09 Thread Zhao Yakui
On Wed, 2014-04-09 at 08:27 -0600, Daniel Vetter wrote:
 On Wed, Apr 09, 2014 at 09:59:52AM +0800, Zhao Yakui wrote:
  Based on the hardware spec, the BDW GT3 has the different configuration
  with the BDW GT1/GT2. So split the BDW device info definition.
  This is to do the preparation for adding the Dual BSD rings on BDW GT3 
  machine.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   drivers/gpu/drm/i915/i915_drv.c |   24 +++-
   include/drm/i915_pciids.h   |   10 +++---
   2 files changed, 30 insertions(+), 4 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_drv.c 
  b/drivers/gpu/drm/i915/i915_drv.c
  index a01faea..609f837 100644
  --- a/drivers/gpu/drm/i915/i915_drv.c
  +++ b/drivers/gpu/drm/i915/i915_drv.c
  @@ -279,6 +279,26 @@ static const struct intel_device_info 
  intel_broadwell_m_info = {
  GEN_DEFAULT_PIPEOFFSETS,
   };
   
  +static const struct intel_device_info intel_broadwell_gt3d_info = {
  +   .gen = 8, .num_pipes = 3,
  +   .need_gfx_hws = 1, .has_hotplug = 1,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .has_llc = 1,
  +   .has_ddi = 1,
  +   .has_fbc = 1,
  +   GEN_DEFAULT_PIPEOFFSETS,
  +};
  +
  +static const struct intel_device_info intel_broadwell_gt3m_info = {
  +   .gen = 8, .is_mobile = 1, .num_pipes = 3,
  +   .need_gfx_hws = 1, .has_hotplug = 1,
  +   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
  +   .has_llc = 1,
  +   .has_ddi = 1,
  +   .has_fbc = 1,
  +   GEN_DEFAULT_PIPEOFFSETS,
  +};
  +
   /*
* Make sure any device matches here are from most specific to most
* general.  For example, since the Quanta match is based on the subsystem
  @@ -312,7 +332,9 @@ static const struct intel_device_info 
  intel_broadwell_m_info = {
  INTEL_VLV_M_IDS(intel_valleyview_m_info),  \
  INTEL_VLV_D_IDS(intel_valleyview_d_info),  \
  INTEL_BDW_M_IDS(intel_broadwell_m_info),   \
  -   INTEL_BDW_D_IDS(intel_broadwell_d_info)
  +   INTEL_BDW_D_IDS(intel_broadwell_d_info),   \
  +   INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \
  +   INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info)
 
 You've forgotten to update the stolen memory quirk table in the x86 code.
 Just grep for INTEL_BDW_M_IDS to see all users of these macros.

Thanks for your info. 
I will update it in next version.

Thanks.
Yakui

 -Daniel
 
   
   static const struct pci_device_id pciidlist[] = {  /* aka */
  INTEL_PCI_IDS,
  diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
  index 940ece4..32d75f8 100644
  --- a/include/drm/i915_pciids.h
  +++ b/include/drm/i915_pciids.h
  @@ -225,12 +225,16 @@
   
   #define INTEL_BDW_M_IDS(info) \
  _INTEL_BDW_M_IDS(1, info), \
  -   _INTEL_BDW_M_IDS(2, info), \
  -   _INTEL_BDW_M_IDS(3, info)
  +   _INTEL_BDW_M_IDS(2, info)
   
   #define INTEL_BDW_D_IDS(info) \
  _INTEL_BDW_D_IDS(1, info), \
  -   _INTEL_BDW_D_IDS(2, info), \
  +   _INTEL_BDW_D_IDS(2, info)
  +
  +#define INTEL_BDW_GT3M_IDS(info) \
  +   _INTEL_BDW_M_IDS(3, info)
  +
  +#define INTEL_BDW_GT3D_IDS(info) \
  _INTEL_BDW_D_IDS(3, info)
   
   #endif /* _I915_PCIIDS_H */
  -- 
  1.7.10.1
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 4/5] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning

2014-04-09 Thread Zhao Yakui
On Wed, 2014-04-09 at 08:29 -0600, Daniel Vetter wrote:
 On Wed, Apr 09, 2014 at 09:59:55AM +0800, Zhao Yakui wrote:
  The Gen7 doesn't have the second BSD ring. But it will complain the switch 
  check
  warning message during compilation. So just add it to remove the
  switch check warning.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   drivers/gpu/drm/i915/intel_ringbuffer.c |1 +
   1 file changed, 1 insertion(+)
  
  diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
  b/drivers/gpu/drm/i915/intel_ringbuffer.c
  index 11d0687..43e0227 100644
  --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
  +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
  @@ -984,6 +984,7 @@ void intel_ring_setup_status_page(struct 
  intel_ring_buffer *ring)
  case BCS:
  mmio = BLT_HWS_PGA_GEN7;
  break;
  +   case VCS2:
 
 Maybe add a /* doesn't actually exist but shuts up gcc */ comment?

Make sense.

I will update it.

Thanks.
Yakui
 -Daniel
 
  case VCS:
  mmio = BSD_HWS_PGA_GEN7;
  break;
  -- 
  1.7.10.1
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-09 Thread Zhao Yakui
On Wed, 2014-04-09 at 08:34 -0600, Daniel Vetter wrote:
 On Wed, Apr 09, 2014 at 09:59:56AM +0800, Zhao Yakui wrote:
  The BDW GT3 has two independent BSD rings, which can be used to process the
  video commands. To be simpler, it is transparent to user-space 
  driver/middleware.
  Instead the kernel driver will decide which ring is to dispatch the BSD 
  video
  command.
  
  As every BSD ring is powerful, it is enough to dispatch the BSD video 
  command
  based on the drm fd. In such case the different BSD ring is used for video 
  playing
  back and encoding. At the same time the coarse dispatch mechanism can help 
  to avoid
  the object synchronization between the BSD rings.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
 
 This looks way too complicated. First things first please get rid of the
 atomic_t usage. If you don't have _massive_ comments explaining the memory
 barriers you're most likely using linux kernel atomic_t wrong. They are
 fully unordered.

Thanks for the review.

For the atomic_t usage:  I will remove it in next version as the counter
is already protected by the lock.  

 
 With that out of the way this still looks a bit complicated really. Can't
 we just use a very simple static rule in gen8_dispatch_bsd_ring which
 hashed the pointer address of the file_priv? Just to get things going,
 once we have a clear need we can try to make things more intelligent. But
 in case of doubt I really prefer if we start with the dumbest possible
 approach first and add complexity instead of starting with something
 really complex and simplifying it.

Do you mean that file_priv is hashed and then is mapped to BSD 0 or 1
ring?  
The GT3 machine has two independent BSD rings. It will be better that
the kernel driver can balance the video workload between the two rings. 
When using the hashed file_priv to select BSD ring, the video balance
depends on the design of hash design. Under some scenarios, it will be
possible that one ring is very busy while another ring is very idle. And
then performance of video playing back/encoding will be affected.
At the same time the hash mechanism is only used to select the
corresponding BSD ring when one drm_fd is opened. And it doesn't
consider the video workload balance after finishing some workloads.

The following is the basic idea in my patch.(A counter variable is added
for ring. The bigger the counter, the higher the workload).
   a. When one new fd needs to dispatch the BSD video command, it will
select the ring with the lowest workload(lowest counter). And then
counter in this ring will be added.
   b. when the drm fd is closed(the workload is finished), the counter
of the ring used by file_priv will be decreased. 
   c. When the drm fd already selects one BSD ring in previously
submitted command, it will check whether it is using the ring with the
lowest workload(lowest counter). If not, it can be switched. The purpose
is to assure that the workload is still balanced between the two BSD
rings. For example: User wants to play back four video clips. BSD 0 ring
is selected to play back the two long clips. BSD 1 ring is selected to
play back the two short clips. After it finishes the playing back of two
short clips, the BSD 1 ring can be switched to play back the long clip.
Still balance.

What do you think?

 -Daniel
 
  ---
   drivers/gpu/drm/i915/i915_dma.c|   14 ++
   drivers/gpu/drm/i915/i915_drv.h|3 ++
   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   73 
  +++-
   drivers/gpu/drm/i915/intel_ringbuffer.c|2 +
   drivers/gpu/drm/i915/intel_ringbuffer.h|2 +
   5 files changed, 93 insertions(+), 1 deletion(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_dma.c 
  b/drivers/gpu/drm/i915/i915_dma.c
  index 0b38f88..8260463 100644
  --- a/drivers/gpu/drm/i915/i915_dma.c
  +++ b/drivers/gpu/drm/i915/i915_dma.c
  @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
  long flags)
  spin_lock_init(dev_priv-backlight_lock);
  spin_lock_init(dev_priv-uncore.lock);
  spin_lock_init(dev_priv-mm.object_stat_lock);
  +   spin_lock_init(dev_priv-bsd_lock);
  mutex_init(dev_priv-dpio_lock);
  mutex_init(dev_priv-modeset_restore_lock);
   
  @@ -1928,7 +1929,20 @@ void i915_driver_preclose(struct drm_device * dev, 
  struct drm_file *file_priv)
   void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
   {
  struct drm_i915_file_private *file_priv = file-driver_priv;
  +   struct intel_ring_buffer *bsd_ring;
  +   struct drm_i915_private *dev_priv = dev-dev_private;
   
  +   if (file_priv  file_priv-bsd_ring) {
  +   int cmd_counter;
  +   bsd_ring = file_priv-bsd_ring;
  +   file_priv-bsd_ring = NULL;
  +   spin_lock(dev_priv-bsd_lock);
  +   cmd_counter = atomic_sub_return(1, bsd_ring-bsd_cmd_counter);
  +   if (cmd_counter  0) {
  +   atomic_set(bsd_ring

Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-09 Thread Zhao Yakui
On Wed, 2014-04-09 at 08:45 -0600, Daniel Vetter wrote:
 On Wed, Apr 09, 2014 at 09:59:51AM +0800, Zhao Yakui wrote:
  
  This is the patch set that tries to add the support of dual BSD rings on BDW
  GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, 
  which
  can be used to process the video commands. To be simpler, it is transparent 
  to user-space driver/middleware. In such case the kernel driver will decide
  which ring is to dispatch the BSD video command.
  
  As every BSD ring is powerful, it is enough to dispatch the BSD video 
  command
  based on the drm fd. In such case the different BSD ring is used for video 
  playing
  back and encoding. At the same time the coarse dispatch mechanism can help 
  to avoid
  the object synchronization between the BSD rings.
 
 Ok, I've quickly read through it all and commented on a few things. Imo
 the last patch should be massively simplified, at least for the first
 round. Other things look small.
 
Hi, Daniel

Thanks for your review.

 What's still missing are testcases, and I have two things in mind here:
 - Exercise the 2nd ring dispatch and sync a bit. Since the 2nd bsd ring is
   hidden within the kernel I think the right approach would be to open a
   few drm fds (10 or so) and then randomly use them with a dummy reloc. We
   have two testcases which can be used as blueprints that need
   adjustement:
 
   - gem_ring_sync_loop: Probably easiest to copy it to a new file as
 gem_multi_bsd_sync_loop. This test exercises semaphores.
   - gem_dummy_reloc_loop, subtest mixed: Almost the same as the above, but
 the sync is done _inside_ the loop and hence this exercises gpu/cpu
 sync. We need both tests adjusted, for for this we need a new
 multi-bsd test.

Agree with your concerns. I will try to add the
gem_multi_bsd_sync_loop/dummy_reloc_loop test case so that it can test
the sync with multi-BSD.

BTW: How about if I directly add multiple fds in gem_ring_sync_loop test
case and then test the sync among the different rings?  In such case the
user-application doesn't need to know the existence of multi-BSD rings.

 
 - New testcase to fully test main execbuffer flags. This is simply
   something that's we don't yet have. The next guy to touch execbuf code
   needs to add it, and it looks like that's you ;-) I've done a JIRA task
   for the resource streamer work, but I think the resource streamer wont
   be merged anytime soon. So I'll reassign to you. Jira task is VIZ-3129.

For the new testcase of execbuffer flag:  Do you have any idea about
which kind of exec flag needs to be checked? Do you have any idea about
the expected failure/successful behavour for the flags?
   For example: I915_EXEC_PINNED : If one object is not pinned and
submitted, what behavour is expected? Fail or wrong?

Thanks.
Yakui
 
 Thanks, Daniel
 
  
  
  Zhao Yakui (5):
drm/i915: Split the BDW device definition to prepare for dual BSD
  rings on BDW GT3
drm/i915: Initialize the second BSD ring on BDW GT3 machine
drm/i915: Handle the irq interrupt for the second BSD ring
drm/i915: Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to
  remove the switch check warning
drm/i915: Use the coarse mechanism based on drm fd to dispatch the BSD 
  command
  on BDW GT3
  
   drivers/gpu/drm/i915/i915_dma.c|   14 ++
   drivers/gpu/drm/i915/i915_drv.c|   24 -
   drivers/gpu/drm/i915/i915_drv.h|5 ++
   drivers/gpu/drm/i915/i915_gem.c|9 +++-
   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   73 
  +++-
   drivers/gpu/drm/i915/i915_gpu_error.c  |1 +
   drivers/gpu/drm/i915/i915_irq.c|5 +-
   drivers/gpu/drm/i915/i915_reg.h|1 +
   drivers/gpu/drm/i915/intel_ringbuffer.c|   57 ++
   drivers/gpu/drm/i915/intel_ringbuffer.h|6 ++-
   include/drm/i915_pciids.h  |   10 ++--
   11 files changed, 197 insertions(+), 8 deletions(-)
  
  -- 
  1.7.10.1
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-08 Thread Zhao Yakui
The BDW GT3 has two independent BSD rings, which can be used to process the
video commands. To be simpler, it is transparent to user-space 
driver/middleware.
Instead the kernel driver will decide which ring is to dispatch the BSD video
command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case the different BSD ring is used for video 
playing
back and encoding. At the same time the coarse dispatch mechanism can help to 
avoid
the object synchronization between the BSD rings.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c|   14 ++
 drivers/gpu/drm/i915/i915_drv.h|3 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   73 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c|2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h|2 +
 5 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0b38f88..8260463 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
spin_lock_init(dev_priv-backlight_lock);
spin_lock_init(dev_priv-uncore.lock);
spin_lock_init(dev_priv-mm.object_stat_lock);
+   spin_lock_init(dev_priv-bsd_lock);
mutex_init(dev_priv-dpio_lock);
mutex_init(dev_priv-modeset_restore_lock);
 
@@ -1928,7 +1929,20 @@ void i915_driver_preclose(struct drm_device * dev, 
struct drm_file *file_priv)
 void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
 {
struct drm_i915_file_private *file_priv = file-driver_priv;
+   struct intel_ring_buffer *bsd_ring;
+   struct drm_i915_private *dev_priv = dev-dev_private;
 
+   if (file_priv  file_priv-bsd_ring) {
+   int cmd_counter;
+   bsd_ring = file_priv-bsd_ring;
+   file_priv-bsd_ring = NULL;
+   spin_lock(dev_priv-bsd_lock);
+   cmd_counter = atomic_sub_return(1, bsd_ring-bsd_cmd_counter);
+   if (cmd_counter  0) {
+   atomic_set(bsd_ring-bsd_cmd_counter, 0);
+   }
+   spin_unlock(dev_priv-bsd_lock);
+   }
kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d77f4e0..128639c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1457,6 +1457,8 @@ struct drm_i915_private {
struct i915_dri1_state dri1;
/* Old ums support infrastructure, same warning applies. */
struct i915_ums_state ums;
+   /* the lock for dispatch video commands on two BSD rings */
+   spinlock_t bsd_lock;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -1664,6 +1666,7 @@ struct drm_i915_file_private {
 
struct i915_hw_context *private_default_ctx;
atomic_t rps_wait_boost;
+   struct  intel_ring_buffer *bsd_ring;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3491402..75d8cc0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -999,6 +999,70 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+/**
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The Ring ID is returned.
+ */
+static int gen8_dispatch_bsd_ring(struct drm_device *dev,
+ struct drm_file *file)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+   struct intel_ring_buffer *temp_ring, *bsd_ring;
+   int bsd_counter, temp_counter;
+
+   if (file_priv-bsd_ring) {
+   /* Check whether the load balance is required.*/
+   spin_lock(dev_priv-bsd_lock);
+   bsd_counter = 
atomic_read((file_priv-bsd_ring-bsd_cmd_counter));
+   temp_ring = dev_priv-ring[VCS];
+   temp_counter = atomic_read(temp_ring-bsd_cmd_counter);
+   bsd_ring = dev_priv-ring[VCS];
+
+   temp_ring = dev_priv-ring[VCS2];
+   if (atomic_read(temp_ring-bsd_cmd_counter)  temp_counter) {
+   temp_counter = atomic_read(temp_ring-bsd_cmd_counter);
+   bsd_ring = temp_ring;
+   }
+   /*
+* If it is already the ring with the minimum load, it is
+* unnecessary to switch it.
+*/
+   if (bsd_ring == file_priv-bsd_ring) {
+   spin_unlock(dev_priv-bsd_lock);
+   return bsd_ring-id;
+   }
+   /*
+* If the load delta between current ring and target ring

[Intel-gfx] [PATCH 3/5] drm/i915:Handle the irq interrupt for the second BSD ring

2014-04-08 Thread Zhao Yakui
Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_irq.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bdda3b5..d5b1dd3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
DRM_ERROR(The master control interrupt lied (GT0)!\n);
}
 
-   if (master_ctl  GEN8_GT_VCS1_IRQ) {
+   if (master_ctl  (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
tmp = I915_READ(GEN8_GT_IIR(1));
if (tmp) {
ret = IRQ_HANDLED;
vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
if (vcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[VCS]);
+   vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
+   if (vcs  GT_RENDER_USER_INTERRUPT)
+   notify_ring(dev, dev_priv-ring[VCS2]);
I915_WRITE(GEN8_GT_IIR(1), tmp);
} else
DRM_ERROR(The master control interrupt lied (GT1)!\n);
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3

2014-04-08 Thread Zhao Yakui

This is the patch set that tries to add the support of dual BSD rings on BDW
GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which
can be used to process the video commands. To be simpler, it is transparent 
to user-space driver/middleware. In such case the kernel driver will decide
which ring is to dispatch the BSD video command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case the different BSD ring is used for video 
playing
back and encoding. At the same time the coarse dispatch mechanism can help to 
avoid
the object synchronization between the BSD rings.


Zhao Yakui (5):
  drm/i915: Split the BDW device definition to prepare for dual BSD
rings on BDW GT3
  drm/i915: Initialize the second BSD ring on BDW GT3 machine
  drm/i915: Handle the irq interrupt for the second BSD ring
  drm/i915: Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to
remove the switch check warning
  drm/i915: Use the coarse mechanism based on drm fd to dispatch the BSD command
on BDW GT3

 drivers/gpu/drm/i915/i915_dma.c|   14 ++
 drivers/gpu/drm/i915/i915_drv.c|   24 -
 drivers/gpu/drm/i915/i915_drv.h|5 ++
 drivers/gpu/drm/i915/i915_gem.c|9 +++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   73 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c  |1 +
 drivers/gpu/drm/i915/i915_irq.c|5 +-
 drivers/gpu/drm/i915/i915_reg.h|1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|   57 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h|6 ++-
 include/drm/i915_pciids.h  |   10 ++--
 11 files changed, 197 insertions(+), 8 deletions(-)

-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/5] drm/i915:Initialize the second BSD ring on BDW GT3 machine

2014-04-08 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 machine has two independent
BSD ring that can be used to dispatch the video commands.
So just initialize it.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |4 +--
 drivers/gpu/drm/i915/i915_drv.h |2 ++
 drivers/gpu/drm/i915/i915_gem.c |9 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |1 +
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   54 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++-
 7 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 609f837..10941c5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -282,7 +282,7 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
 static const struct intel_device_info intel_broadwell_gt3d_info = {
.gen = 8, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
@@ -292,7 +292,7 @@ static const struct intel_device_info 
intel_broadwell_gt3d_info = {
 static const struct intel_device_info intel_broadwell_gt3m_info = {
.gen = 8, .is_mobile = 1, .num_pipes = 3,
.need_gfx_hws = 1, .has_hotplug = 1,
-   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
.has_llc = 1,
.has_ddi = 1,
.has_fbc = 1,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 55addaa..d77f4e0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1817,7 +1817,9 @@ struct drm_i915_cmd_table {
 #define BSD_RING   (1VCS)
 #define BLT_RING   (1BCS)
 #define VEBOX_RING (1VECS)
+#define BSD2_RING  (1VCS2)
 #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
+#define HAS_BSD2(dev)  (INTEL_INFO(dev)-ring_mask  BSD2_RING)
 #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
 #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
 #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c70121d..1756276 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4372,13 +4372,20 @@ static int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_blt_ring;
}
 
+   if (HAS_BSD2(dev)) {
+   ret = intel_init_bsd2_ring_buffer(dev);
+   if (ret)
+   goto cleanup_vebox_ring;
+   }
 
ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
if (ret)
-   goto cleanup_vebox_ring;
+   goto cleanup_ring;
 
return 0;
 
+cleanup_ring:
+   intel_cleanup_ring_buffer(dev_priv-ring[VCS2]);
 cleanup_vebox_ring:
intel_cleanup_ring_buffer(dev_priv-ring[VECS]);
 cleanup_blt_ring:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1005af0..f6d21b3 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,6 +42,7 @@ static const char *ring_str(int ring)
case VCS: return bsd;
case BCS: return blt;
case VECS: return vebox;
+   case VCS2: return second bsd;
default: return ;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8e60737..8f5c103 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -750,6 +750,7 @@ enum punit_power_well {
 #define RENDER_RING_BASE   0x02000
 #define BSD_RING_BASE  0x04000
 #define GEN6_BSD_RING_BASE 0x12000
+#define GEN8_BSD2_RING_BASE0x1c000
 #define VEBOX_RING_BASE0x1a000
 #define BLT_RING_BASE  0x22000
 #define RING_TAIL(base)((base)+0x30)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 3d76ce1..11d0687 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1920,10 +1920,12 @@ int intel_init_render_ring_buffer(struct drm_device 
*dev)
ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV;
ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB;
ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE;
+   ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
ring-signal_mbox[RCS] = GEN6_NOSYNC;
ring-signal_mbox[VCS] = GEN6_VRSYNC

[Intel-gfx] [PATCH 4/5] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning

2014-04-08 Thread Zhao Yakui
The Gen7 doesn't have the second BSD ring. But it will complain the switch check
warning message during compilation. So just add it to remove the
switch check warning.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 11d0687..43e0227 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -984,6 +984,7 @@ void intel_ring_setup_status_page(struct intel_ring_buffer 
*ring)
case BCS:
mmio = BLT_HWS_PGA_GEN7;
break;
+   case VCS2:
case VCS:
mmio = BSD_HWS_PGA_GEN7;
break;
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/5] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3

2014-04-08 Thread Zhao Yakui
Based on the hardware spec, the BDW GT3 has the different configuration
with the BDW GT1/GT2. So split the BDW device info definition.
This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |   24 +++-
 include/drm/i915_pciids.h   |   10 +++---
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a01faea..609f837 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -279,6 +279,26 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
GEN_DEFAULT_PIPEOFFSETS,
 };
 
+static const struct intel_device_info intel_broadwell_gt3d_info = {
+   .gen = 8, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3m_info = {
+   .gen = 8, .is_mobile = 1, .num_pipes = 3,
+   .need_gfx_hws = 1, .has_hotplug = 1,
+   .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+   .has_llc = 1,
+   .has_ddi = 1,
+   .has_fbc = 1,
+   GEN_DEFAULT_PIPEOFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -312,7 +332,9 @@ static const struct intel_device_info 
intel_broadwell_m_info = {
INTEL_VLV_M_IDS(intel_valleyview_m_info),  \
INTEL_VLV_D_IDS(intel_valleyview_d_info),  \
INTEL_BDW_M_IDS(intel_broadwell_m_info),   \
-   INTEL_BDW_D_IDS(intel_broadwell_d_info)
+   INTEL_BDW_D_IDS(intel_broadwell_d_info),   \
+   INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \
+   INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info)
 
 static const struct pci_device_id pciidlist[] = {  /* aka */
INTEL_PCI_IDS,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 940ece4..32d75f8 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -225,12 +225,16 @@
 
 #define INTEL_BDW_M_IDS(info) \
_INTEL_BDW_M_IDS(1, info), \
-   _INTEL_BDW_M_IDS(2, info), \
-   _INTEL_BDW_M_IDS(3, info)
+   _INTEL_BDW_M_IDS(2, info)
 
 #define INTEL_BDW_D_IDS(info) \
_INTEL_BDW_D_IDS(1, info), \
-   _INTEL_BDW_D_IDS(2, info), \
+   _INTEL_BDW_D_IDS(2, info)
+
+#define INTEL_BDW_GT3M_IDS(info) \
+   _INTEL_BDW_M_IDS(3, info)
+
+#define INTEL_BDW_GT3D_IDS(info) \
_INTEL_BDW_D_IDS(3, info)
 
 #endif /* _I915_PCIIDS_H */
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx