Re: [Intel-gfx] [PATCH v2] drm/i915: Remove memory frequency calculation
On 2021/10/13 10:54, Matt Roper wrote: On Tue, Oct 12, 2021 at 06:00:46PM -0700, José Roberto de Souza wrote: This memory frequency calculated is only used to check if it is zero, what is not useful as it will never actually be zero. Also the calculation is wrong, we should be checking other bit to select the appropriate frequency multiplier while this code is stuck with a fixed multiplier. So here dropping it as whole. v2: - Also remove memory frequency calculation for gen9 LP platforms Cc: Yakui Zhao Cc: Matt Roper Fixes: f8112cb9574b ("drm/i915/gen11+: Only load DRAM information from pcode") Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper After removing the check of memory frequency, the EHL SBL can work as expected. Otherwise it will fail some checks in intel_dram_detect because of incorrect memory frequency calculation. Add: Tested-by: Zhao Yakui --- drivers/gpu/drm/i915/i915_reg.h | 8 drivers/gpu/drm/i915/intel_dram.c | 30 ++ 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a897f4abea0c3..8825f7ac477b6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -11109,12 +11109,6 @@ enum skl_power_gate { #define DC_STATE_DEBUG_MASK_CORES(1 << 0) #define DC_STATE_DEBUG_MASK_MEMORY_UP(1 << 1) -#define BXT_P_CR_MC_BIOS_REQ_0_0_0 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114) -#define BXT_REQ_DATA_MASK 0x3F -#define BXT_DRAM_CHANNEL_ACTIVE_SHIFT 12 -#define BXT_DRAM_CHANNEL_ACTIVE_MASK (0xF << 12) -#define BXT_MEMORY_FREQ_MULTIPLIER_HZ 1 - #define BXT_D_CR_DRP0_DUNIT8 0x1000 #define BXT_D_CR_DRP0_DUNIT9 0x1200 #define BXT_D_CR_DRP0_DUNIT_START8 @@ -11145,9 +11139,7 @@ enum skl_power_gate { #define BXT_DRAM_TYPE_LPDDR4 (0x2 << 22) #define BXT_DRAM_TYPE_DDR4 (0x4 << 22) -#define SKL_MEMORY_FREQ_MULTIPLIER_HZ 2 #define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5E04) -#define SKL_REQ_DATA_MASK (0xF << 0) #define DG1_GEAR_TYPEREG_BIT(16) #define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 30a0cab5eff46..0adadfd9528aa 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -244,7 +244,6 @@ static int skl_get_dram_info(struct drm_i915_private *i915) { struct dram_info *dram_info = >dram_info; - u32 mem_freq_khz, val; int ret; dram_info->type = skl_get_dram_type(i915); @@ -255,17 +254,6 @@ skl_get_dram_info(struct drm_i915_private *i915) if (ret) return ret; - val = intel_uncore_read(>uncore, - SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); - mem_freq_khz = DIV_ROUND_UP((val & SKL_REQ_DATA_MASK) * - SKL_MEMORY_FREQ_MULTIPLIER_HZ, 1000); - - if (dram_info->num_channels * mem_freq_khz == 0) { - drm_info(>drm, -"Couldn't get system memory bandwidth\n"); - return -EINVAL; - } - return 0; } @@ -350,24 +338,10 @@ static void bxt_get_dimm_info(struct dram_dimm_info *dimm, u32 val) static int bxt_get_dram_info(struct drm_i915_private *i915) { struct dram_info *dram_info = >dram_info; - u32 dram_channels; - u32 mem_freq_khz, val; - u8 num_active_channels, valid_ranks = 0; + u32 val; + u8 valid_ranks = 0; int i; - val = intel_uncore_read(>uncore, BXT_P_CR_MC_BIOS_REQ_0_0_0); - mem_freq_khz = DIV_ROUND_UP((val & BXT_REQ_DATA_MASK) * - BXT_MEMORY_FREQ_MULTIPLIER_HZ, 1000); - - dram_channels = val & BXT_DRAM_CHANNEL_ACTIVE_MASK; - num_active_channels = hweight32(dram_channels); - - if (mem_freq_khz * num_active_channels == 0) { - drm_info(>drm, -"Couldn't get system memory bandwidth\n"); - return -EINVAL; - } - /* * Now read each DUNIT8/9/10/11 to check the rank of each dimms. */ -- 2.33.0
Re: [Intel-gfx] [v1 10/10] drm/i915/gvt: GVTg support ppgtt pvmmio optimization
On 2018年10月11日 14:14, Xiaolin Zhang wrote: This patch handles ppgtt update from g2v notification. It read out ppgtt pte entries from guest pte tables page and convert them to host pfns. It creates local ppgtt tables and insert the content pages into the local ppgtt tables directly, which does not track the usage of guest page table and removes the cost of write protection from the original shadow page mechansim. It is possible that Guest VGPU writes the ppgtt entry by using 2M/64K page mode. If so, the gvtg should also handle it in PVMMIO mode. v1: rebase v0: RFC Signed-off-by: Xiaolin Zhang --- drivers/gpu/drm/i915/gvt/gtt.c | 318 drivers/gpu/drm/i915/gvt/gtt.h | 9 + drivers/gpu/drm/i915/gvt/handlers.c | 13 +- 3 files changed, 338 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 58e166e..8d3e21a 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1744,6 +1744,26 @@ static int ppgtt_handle_guest_write_page_table_bytes( return 0; } +static void invalidate_mm_pv(struct intel_vgpu_mm *mm) +{ + struct intel_vgpu *vgpu = mm->vgpu; + struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt_gtt *gtt = >gtt; + struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; + struct intel_gvt_gtt_entry se; + + i915_ppgtt_close(>ppgtt->vm); + i915_ppgtt_put(mm->ppgtt); + + ppgtt_get_shadow_root_entry(mm, , 0); + if (!ops->test_present()) + return; + se.val64 = 0; + ppgtt_set_shadow_root_entry(mm, , 0); + + mm->ppgtt_mm.shadowed = false; +} + static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) { struct intel_vgpu *vgpu = mm->vgpu; @@ -1756,6 +1776,11 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) if (!mm->ppgtt_mm.shadowed) return; + if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE) { + invalidate_mm_pv(mm); + return; + } + for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { ppgtt_get_shadow_root_entry(mm, , index); @@ -1773,6 +1798,26 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) mm->ppgtt_mm.shadowed = false; } +static int shadow_mm_pv(struct intel_vgpu_mm *mm) +{ + struct intel_vgpu *vgpu = mm->vgpu; + struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt_gtt_entry se; + + mm->ppgtt = i915_ppgtt_create(gvt->dev_priv, NULL); + if (IS_ERR(mm->ppgtt)) { + gvt_vgpu_err("fail to create ppgtt for pdp 0x%llx\n", + px_dma(>ppgtt->pml4)); + return PTR_ERR(mm->ppgtt); + } + + se.type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; + se.val64 = px_dma(>ppgtt->pml4); + ppgtt_set_shadow_root_entry(mm, , 0); + mm->ppgtt_mm.shadowed = true; + + return 0; +} static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) { @@ -1787,6 +1832,9 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) if (mm->ppgtt_mm.shadowed) return 0; + if (VGPU_PVMMIO(mm->vgpu) & PVMMIO_PPGTT_UPDATE) + return shadow_mm_pv(mm); + mm->ppgtt_mm.shadowed = true; for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { @@ -2767,3 +2815,273 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) intel_vgpu_destroy_all_ppgtt_mm(vgpu); intel_vgpu_reset_ggtt(vgpu, true); } + +int intel_vgpu_g2v_pv_ppgtt_alloc_4lvl(struct intel_vgpu *vgpu, + u64 pdps[]) +{ + struct intel_vgpu_mm *mm; + int ret = 0; + u32 offset; + struct pv_ppgtt_update pv_ppgtt; + + offset = offsetof(struct gvt_shared_page, pv_ppgtt); + intel_gvt_read_shared_page(vgpu, offset, _ppgtt, sizeof(pv_ppgtt)); + + mm = intel_vgpu_find_ppgtt_mm(vgpu, _ppgtt.pdp); + if (!mm) { + gvt_vgpu_err("failed to find pdp 0x%llx\n", pv_ppgtt.pdp); + ret = -EINVAL; + } else { + ret = mm->ppgtt->vm.allocate_va_range(>ppgtt->vm, + pv_ppgtt.start, pv_ppgtt.length); + if (ret) + gvt_vgpu_err("failed to alloc %llx\n", pv_ppgtt.pdp); + } + + return ret; +} + +int intel_vgpu_g2v_pv_ppgtt_clear_4lvl(struct intel_vgpu *vgpu, + u64 pdps[]) +{ + struct intel_vgpu_mm *mm; + int ret = 0; + u32 offset; + struct pv_ppgtt_update pv_ppgtt; + + offset = offsetof(struct gvt_shared_page, pv_ppgtt); + intel_gvt_read_shared_page(vgpu, offset, _ppgtt, sizeof(pv_ppgtt)); + mm = intel_vgpu_find_ppgtt_mm(vgpu, _ppgtt.pdp); + if (!mm) { + gvt_vgpu_err("failed to find pdp 0x%llx\n", pv_ppgtt.pdp); + ret = -EINVAL; + } else { +
Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU
>-Original Message- >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >Sent: Tuesday, July 3, 2018 10:08 PM >To: Zhao, Yakui ; Daniel Vetter >Cc: intel-gfx@lists.freedesktop.org >Subject: RE: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on >VGPU > >Quoting Zhao, Yakui (2018-07-03 14:58:31) >> >-Original Message- >> >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >> >Sent: Tuesday, July 3, 2018 9:25 PM >> >To: Zhao, Yakui ; Daniel Vetter >> > >> >Cc: intel-gfx@lists.freedesktop.org >> >Subject: RE: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg >> >only once on VGPU >> > >> >Quoting Zhao, Yakui (2018-07-03 13:47:46) >> >> >> >> >-Original Message- >> >> >From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of >> >> >Daniel Vetter >> >> >Sent: Tuesday, July 3, 2018 5:52 PM >> >> >To: Chris Wilson >> >> >Cc: Daniel Vetter ; Zhao, Yakui >> >> >; intel-gfx@lists.freedesktop.org >> >> >Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg >> >> >only once on VGPU >> >> > >> >> >On Tue, Jul 03, 2018 at 10:05:28AM +0100, Chris Wilson wrote: >> >> >> Quoting Daniel Vetter (2018-07-03 09:51:03) >> >> >> > On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote: >> >> >> > > On VGPU scenario the read/write operation of fence_reg will >> >> >> > > be trapped by the GVT-g. And then gvt-g follows the HW spec >> >> >> > > to write the >> >> >fence_reg. >> >> >> > > So it is unnecessary to read/write fence reg several times. >> >> >> > > This will help to reduce the unnecessary trap of fence_reg >> >> >> > > mmio >> >operation. >> >> >> > > >> >> >> > > V1->V2: Fix one typo error of parameter when calling >> >> >> > > V1->intel_vgpu_active >> >> >> > > >> >> >> > > Signed-off-by: Zhao Yakui >> >> >> > >> >> >> > Ok this makes more sense. Except you need to put the 64bit >> >> >> > entirely into the vpgu block, with a comment explaining why >> >> >> > this is safe (since the vpgu will take care of updating fences >> >> >> > correctly). >> >> >> >> >> >> Except, who cares? Are fence registers being rewritten that >> >> >> frequently that special casing vgpu is worth the hassle. Part of >> >> >> that is that you need to leave a hint behind in the code that >> >> >> (a) explains why it is safe after having the "here be dragons" >> >> >> and (b) why we >> >care. >> >> >> >> >> >> On a more pragmatic level if fencing doesn't plateau out to >> >> >> steady state, that is a worrying amount of contention -- the >> >> >> actual fence write itself would be the least of my worries. >> >> > >> >> >I can easily imagine that with the few per-client fences vgpu >> >> >hands out rewrites are much more common. But yeah some real data >> >> >would be >> >good. >> >> >And more reasons to get mesa off of the gtt mmaps. >> >> >> >> Hi, Daniel/Chris >> >> >> >> Thanks for your comments. >> >> The fence reg is used to assure the access of Tiled surface >> >> through aperature window. When fence is needed, the driver helps to >> >> find one available fence reg and then configure it. After it is not >> >> used, the >> >fence will be turned off and then be allocated for next usage. It >> >doesn't rely on the state of fence reg. In such case we don't need >> >to worry about the unsteady state. >> >> >> >> For the VGPU operation: The op of fence reg is trapped. Then >> >> the gvt-g >> >will follow the trapped value to program the fence_reg. >> >> (It will turn off and then write the expected value for any trapped >> >> write op >> >of fence reg). The trapped op in GVT-g is safe. >> >> >> >> Based on the current logic, it needs the five traps when one >> >> fence reg is >> >configured under VGPU mode.(
Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU
>-Original Message- >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >Sent: Tuesday, July 3, 2018 9:25 PM >To: Zhao, Yakui ; Daniel Vetter >Cc: intel-gfx@lists.freedesktop.org >Subject: RE: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on >VGPU > >Quoting Zhao, Yakui (2018-07-03 13:47:46) >> >> >-Original Message- >> >From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of >> >Daniel Vetter >> >Sent: Tuesday, July 3, 2018 5:52 PM >> >To: Chris Wilson >> >Cc: Daniel Vetter ; Zhao, Yakui >> >; intel-gfx@lists.freedesktop.org >> >Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg >> >only once on VGPU >> > >> >On Tue, Jul 03, 2018 at 10:05:28AM +0100, Chris Wilson wrote: >> >> Quoting Daniel Vetter (2018-07-03 09:51:03) >> >> > On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote: >> >> > > On VGPU scenario the read/write operation of fence_reg will be >> >> > > trapped by the GVT-g. And then gvt-g follows the HW spec to >> >> > > write the >> >fence_reg. >> >> > > So it is unnecessary to read/write fence reg several times. >> >> > > This will help to reduce the unnecessary trap of fence_reg mmio >operation. >> >> > > >> >> > > V1->V2: Fix one typo error of parameter when calling >> >> > > V1->intel_vgpu_active >> >> > > >> >> > > Signed-off-by: Zhao Yakui >> >> > >> >> > Ok this makes more sense. Except you need to put the 64bit >> >> > entirely into the vpgu block, with a comment explaining why this >> >> > is safe (since the vpgu will take care of updating fences correctly). >> >> >> >> Except, who cares? Are fence registers being rewritten that >> >> frequently that special casing vgpu is worth the hassle. Part of >> >> that is that you need to leave a hint behind in the code that (a) >> >> explains why it is safe after having the "here be dragons" and (b) why we >care. >> >> >> >> On a more pragmatic level if fencing doesn't plateau out to steady >> >> state, that is a worrying amount of contention -- the actual fence >> >> write itself would be the least of my worries. >> > >> >I can easily imagine that with the few per-client fences vgpu hands >> >out rewrites are much more common. But yeah some real data would be >good. >> >And more reasons to get mesa off of the gtt mmaps. >> >> Hi, Daniel/Chris >> >> Thanks for your comments. >> The fence reg is used to assure the access of Tiled surface >> through aperature window. When fence is needed, the driver helps to >> find one available fence reg and then configure it. After it is not used, the >fence will be turned off and then be allocated for next usage. It doesn't rely >on >the state of fence reg. In such case we don't need to worry about the >unsteady state. >> >> For the VGPU operation: The op of fence reg is trapped. Then the gvt-g >will follow the trapped value to program the fence_reg. >> (It will turn off and then write the expected value for any trapped write op >of fence reg). The trapped op in GVT-g is safe. >> >> Based on the current logic, it needs the five traps when one fence >> reg is >configured under VGPU mode.(Three writes, two reads). >> If it is programmed in one 64-bit op under VGPU mode, only one trap is >needed. And the GVT-g still can configure the expected fence_value. >> As the trap is quite heavy for VGPU, the trap time can be saved. > >But the argument is can we avoid it entirely by never changing the fence. You >say this is used for mapping through the aperture (GTT), we say userspace >shouldn't be doing that for performance reasons :) A slow trap on top of a >slow operation that is already causing contention seems more sensible to fix >at source. (Albeit so long as the maintenance burden is considered and found >to be reasonable, adding special cases with their rationale is acceptable.) So >you have to sell why this mmio is worthy of special attention and curtail any >future questions. If the userspace driver/app can take care of the buffer allocation especially for the tiled surface, maybe it can reduce the ratio of changing the fence. But this can't be avoided if the tiled buffer is needed and allocated. This also depends on the userspace driver. And it is beyond the responsibility of the kernel driver. I
[Intel-gfx] [PATCH v3] drm/i915: Use 64-bit write to optimize writing fence_reg on VGPU
On VGPU scenario the read/write operation of fence_reg will be trapped by the GVT-g. Then gvt-g follows the HW spec to program the fence_reg. And the gvt-g takes care of updating the fence reg correctly for any trapped value of fence reg. So it is unnecessary to read/write fence reg several times. It is enough that the fence reg is written only value in 64-bit mdoe. This will help to reduce the redundantt trap of fence_reg mmio operation. V1->V2: Fix one typo error of parameter when calling intel_vgpu_active. V2->V3: Follow Chris Wilson and Daniel Vetter to add more descriptions. Signed-off-by: Zhao Yakui --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d548ac0..7b10bf9 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -63,6 +63,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, i915_reg_t fence_reg_lo, fence_reg_hi; int fence_pitch_shift; u64 val; + struct drm_i915_private *dev_priv = fence->i915; if (INTEL_GEN(fence->i915) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); @@ -92,9 +93,17 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, val |= I965_FENCE_REG_VALID; } - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; - + if (intel_vgpu_active(dev_priv)) { + /* Use the 64-bit RW to write fence reg on VGPU mode. +* The GVT-g can trap the written val of VGPU to program the +* fence reg. And the fence write in gvt-g follows the +* sequence of off/read/double-write/read. This assures that +* the fence reg is configured correctly. +* At the same time the 64-bit op can help to reduce the num +* of VGPU trap for the fence reg. +*/ + I915_WRITE64_FW(fence_reg_lo, val); + } else { /* To w/a incoherency with non-atomic 64-bit register updates, * we split the 64-bit update into two 32-bit writes. In order * for a partial fence not to be evaluated between writes, we -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU
>-Original Message- >From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of Daniel Vetter >Sent: Tuesday, July 3, 2018 5:52 PM >To: Chris Wilson >Cc: Daniel Vetter ; Zhao, Yakui ; >intel-gfx@lists.freedesktop.org >Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on >VGPU > >On Tue, Jul 03, 2018 at 10:05:28AM +0100, Chris Wilson wrote: >> Quoting Daniel Vetter (2018-07-03 09:51:03) >> > On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote: >> > > On VGPU scenario the read/write operation of fence_reg will be >> > > trapped by the GVT-g. And then gvt-g follows the HW spec to write the >fence_reg. >> > > So it is unnecessary to read/write fence reg several times. This >> > > will help to reduce the unnecessary trap of fence_reg mmio operation. >> > > >> > > V1->V2: Fix one typo error of parameter when calling >> > > V1->intel_vgpu_active >> > > >> > > Signed-off-by: Zhao Yakui >> > >> > Ok this makes more sense. Except you need to put the 64bit entirely >> > into the vpgu block, with a comment explaining why this is safe >> > (since the vpgu will take care of updating fences correctly). >> >> Except, who cares? Are fence registers being rewritten that frequently >> that special casing vgpu is worth the hassle. Part of that is that you >> need to leave a hint behind in the code that (a) explains why it is >> safe after having the "here be dragons" and (b) why we care. >> >> On a more pragmatic level if fencing doesn't plateau out to steady >> state, that is a worrying amount of contention -- the actual fence >> write itself would be the least of my worries. > >I can easily imagine that with the few per-client fences vgpu hands out >rewrites are much more common. But yeah some real data would be good. >And more reasons to get mesa off of the gtt mmaps. Hi, Daniel/Chris Thanks for your comments. The fence reg is used to assure the access of Tiled surface through aperature window. When fence is needed, the driver helps to find one available fence reg and then configure it. After it is not used, the fence will be turned off and then be allocated for next usage. It doesn't rely on the state of fence reg. In such case we don't need to worry about the unsteady state. For the VGPU operation: The op of fence reg is trapped. Then the gvt-g will follow the trapped value to program the fence_reg. (It will turn off and then write the expected value for any trapped write op of fence reg). The trapped op in GVT-g is safe. Based on the current logic, it needs the five traps when one fence reg is configured under VGPU mode.(Three writes, two reads). If it is programmed in one 64-bit op under VGPU mode, only one trap is needed. And the GVT-g still can configure the expected fence_value. As the trap is quite heavy for VGPU, the trap time can be saved. I will put some description in the code and commit log in next version. >-Daniel >-- >Daniel Vetter >Software Engineer, Intel Corporation >http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU
>-Original Message- >From: Daniel Vetter [mailto:daniel.vet...@ffwll.ch] On Behalf Of Daniel Vetter >Sent: Tuesday, July 3, 2018 4:51 PM >To: Zhao, Yakui >Cc: intel-gfx@lists.freedesktop.org >Subject: Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on >VGPU > >On Tue, Jul 03, 2018 at 10:56:17AM +0800, Zhao Yakui wrote: >> On VGPU scenario the read/write operation of fence_reg will be trapped >> by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg. >> So it is unnecessary to read/write fence reg several times. This will >> help to reduce the unnecessary trap of fence_reg mmio operation. >> >> V1->V2: Fix one typo error of parameter when calling intel_vgpu_active >> >> Signed-off-by: Zhao Yakui > >Ok this makes more sense. Except you need to put the 64bit entirely into the >vpgu block, with a comment explaining why this is safe (since the vpgu will >take care of updating fences correctly). Thanks for your comment and reply. The gvt-g will take care how to update the fence reg. I will refine the comments and commit log. >-Daniel > >> --- >> drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +- >> 1 file changed, 9 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c >> b/drivers/gpu/drm/i915/i915_gem_fence_reg.c >> index d92fe03..9c97976 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c >> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c >> @@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct >> drm_i915_fence_reg *fence, >> >> if (INTEL_GEN(fence->i915) >= 6) { >> /* Use the 64-bit RW to read/write fence reg on SNB+ */ >> -I915_WRITE64_FW(fence_reg_lo, 0); >> -I915_READ64(fence_reg_lo); >> - >> -I915_WRITE64_FW(fence_reg_lo, val); >> -I915_READ64(fence_reg_lo); >> +if (intel_vgpu_active(dev_priv)) >> +I915_WRITE64_FW(fence_reg_lo, val); >> +else { >> +I915_WRITE64_FW(fence_reg_lo, 0); >> +I915_READ64(fence_reg_lo); >> + >> +I915_WRITE64_FW(fence_reg_lo, val); >> +I915_READ64(fence_reg_lo); >> +} >> } else { >> /* To w/a incoherency with non-atomic 64-bit register updates, >> * we split the 64-bit update into two 32-bit writes. In order >> -- >> 2.7.4 >> >> ___ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx > >-- >Daniel Vetter >Software Engineer, Intel Corporation >http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 1/2] drm/i915: Use 64-bit to Read/Write fence reg on SNB+
>-Original Message- >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >Sent: Tuesday, July 3, 2018 5:01 PM >To: Daniel Vetter ; Zhao, Yakui >Cc: intel-gfx@lists.freedesktop.org >Subject: Re: [Intel-gfx] [PATCH v2 1/2] drm/i915: Use 64-bit to Read/Write >fence reg on SNB+ > >Quoting Daniel Vetter (2018-07-03 09:49:29) >> On Tue, Jul 03, 2018 at 10:56:16AM +0800, Zhao Yakui wrote: >> > Based on HW spec the fence reg on SNB+ is defined as 64-bit. Just >> > follow the b-spec to use 64-bit read/write mode. >> > >> > Signed-off-by: Zhao Yakui >> >> Please use git blame to understand why you've just re-introduced a bug >> that took months to debug. > >And there's even a very nice comment explaining exactly what the HW does >and why the double write is required. > >First rule of IT: turn if off and on again. Hi, Chris/Daniel Thanks for the detailed explanation. I check the history of this issue. It was one commit about five years ago. Maybe the op of fence reg on HW doesn't follow its description very strictly. Not sure whether it is changed on the latest HW. OK. Please ignore this patch as the double write is safer. >-Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/2] drm/i915: write fence reg only once on VGPU
On VGPU scenario the read/write operation of fence_reg will be trapped by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg. So it is unnecessary to read/write fence reg several times. This will help to reduce the unnecessary trap of fence_reg mmio operation. V1->V2: Fix one typo error of parameter when calling intel_vgpu_active Signed-off-by: Zhao Yakui --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d92fe03..9c97976 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, if (INTEL_GEN(fence->i915) >= 6) { /* Use the 64-bit RW to read/write fence reg on SNB+ */ - I915_WRITE64_FW(fence_reg_lo, 0); - I915_READ64(fence_reg_lo); - - I915_WRITE64_FW(fence_reg_lo, val); - I915_READ64(fence_reg_lo); + if (intel_vgpu_active(dev_priv)) + I915_WRITE64_FW(fence_reg_lo, val); + else { + I915_WRITE64_FW(fence_reg_lo, 0); + I915_READ64(fence_reg_lo); + + I915_WRITE64_FW(fence_reg_lo, val); + I915_READ64(fence_reg_lo); + } } else { /* To w/a incoherency with non-atomic 64-bit register updates, * we split the 64-bit update into two 32-bit writes. In order -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 0/2] drm/i915: Optimize the read/write fence_reg on SNB+
V1->V2: Fix one typo error. Zhao Yakui (2): drm/i915: Use 64-bit to Read/Write fence reg on SNB+ drm/i915: write fence reg only once on VGPU drivers/gpu/drm/i915/i915_gem_fence_reg.c | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/2] drm/i915: Use 64-bit to Read/Write fence reg on SNB+
Based on HW spec the fence reg on SNB+ is defined as 64-bit. Just follow the b-spec to use 64-bit read/write mode. Signed-off-by: Zhao Yakui --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d548ac0..d92fe03 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -63,6 +63,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, i915_reg_t fence_reg_lo, fence_reg_hi; int fence_pitch_shift; u64 val; + struct drm_i915_private *dev_priv = fence->i915; if (INTEL_GEN(fence->i915) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); @@ -92,9 +93,14 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, val |= I965_FENCE_REG_VALID; } - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; + if (INTEL_GEN(fence->i915) >= 6) { + /* Use the 64-bit RW to read/write fence reg on SNB+ */ + I915_WRITE64_FW(fence_reg_lo, 0); + I915_READ64(fence_reg_lo); + I915_WRITE64_FW(fence_reg_lo, val); + I915_READ64(fence_reg_lo); + } else { /* To w/a incoherency with non-atomic 64-bit register updates, * we split the 64-bit update into two 32-bit writes. In order * for a partial fence not to be evaluated between writes, we -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915: write fence reg only once on VGPU
>-Original Message- >From: Zhao, Yakui >Sent: Tuesday, July 3, 2018 10:22 AM >To: intel-gfx@lists.freedesktop.org >Cc: zhen...@linux.intel.com; Zhao, Yakui >Subject: [PATCH 2/2] drm/i915: write fence reg only once on VGPU > >On VGPU scenario the read/write operation of fence_reg will be trapped by >the GVT-g. And then gvt-g follows the HW spec to write the fence_reg. >So it is unnecessary to read/write fence reg several times. This will help to >reduce the unnecessary trap of fence_reg mmio operation. > Sorry for one typo. The V2 will be sent. >Signed-off-by: Zhao Yakui >--- > drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +- > 1 file changed, 9 insertions(+), 5 deletions(-) > >diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c >b/drivers/gpu/drm/i915/i915_gem_fence_reg.c >index d92fe03..55bf6d9 100644 >--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c >+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c >@@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct >drm_i915_fence_reg *fence, > > if (INTEL_GEN(fence->i915) >= 6) { > /* Use the 64-bit RW to read/write fence reg on SNB+ */ >- I915_WRITE64_FW(fence_reg_lo, 0); >- I915_READ64(fence_reg_lo); >- >- I915_WRITE64_FW(fence_reg_lo, val); >- I915_READ64(fence_reg_lo); >+ if (intel_vgpu_active(i915)) >+ I915_WRITE64_FW(fence_reg_lo, val); >+ else { >+ I915_WRITE64_FW(fence_reg_lo, 0); >+ I915_READ64(fence_reg_lo); >+ >+ I915_WRITE64_FW(fence_reg_lo, val); >+ I915_READ64(fence_reg_lo); >+ } > } else { > /* To w/a incoherency with non-atomic 64-bit register updates, >* we split the 64-bit update into two 32-bit writes. In order >-- >2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: write fence reg only once on VGPU
On VGPU scenario the read/write operation of fence_reg will be trapped by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg. So it is unnecessary to read/write fence reg several times. This will help to reduce the unnecessary trap of fence_reg mmio operation. Signed-off-by: Zhao Yakui --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d92fe03..55bf6d9 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, if (INTEL_GEN(fence->i915) >= 6) { /* Use the 64-bit RW to read/write fence reg on SNB+ */ - I915_WRITE64_FW(fence_reg_lo, 0); - I915_READ64(fence_reg_lo); - - I915_WRITE64_FW(fence_reg_lo, val); - I915_READ64(fence_reg_lo); + if (intel_vgpu_active(i915)) + I915_WRITE64_FW(fence_reg_lo, val); + else { + I915_WRITE64_FW(fence_reg_lo, 0); + I915_READ64(fence_reg_lo); + + I915_WRITE64_FW(fence_reg_lo, val); + I915_READ64(fence_reg_lo); + } } else { /* To w/a incoherency with non-atomic 64-bit register updates, * we split the 64-bit update into two 32-bit writes. In order -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 0/2] drm/i915: the Read/Write optimization of fence reg
Zhao Yakui (2): drm/i915: Use 64-bit to Read/Write fence reg on SNB+ drm/i915: write fence reg only once on VGPU drivers/gpu/drm/i915/i915_gem_fence_reg.c | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915: Use 64-bit to Read/Write fence reg on SNB+
Based on HW spec the fence reg on SNB+ is defined as 64-bit. Just follow the b-spec to use 64-bit read/write mode. Signed-off-by: Zhao Yakui --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d548ac0..d92fe03 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -63,6 +63,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, i915_reg_t fence_reg_lo, fence_reg_hi; int fence_pitch_shift; u64 val; + struct drm_i915_private *dev_priv = fence->i915; if (INTEL_GEN(fence->i915) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); @@ -92,9 +93,14 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, val |= I965_FENCE_REG_VALID; } - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; + if (INTEL_GEN(fence->i915) >= 6) { + /* Use the 64-bit RW to read/write fence reg on SNB+ */ + I915_WRITE64_FW(fence_reg_lo, 0); + I915_READ64(fence_reg_lo); + I915_WRITE64_FW(fence_reg_lo, val); + I915_READ64(fence_reg_lo); + } else { /* To w/a incoherency with non-atomic 64-bit register updates, * we split the 64-bit update into two 32-bit writes. In order * for a partial fence not to be evaluated between writes, we -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC
>-Original Message- >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >Sent: Friday, June 22, 2018 3:37 PM >To: Zhao, Yakui ; intel-gfx@lists.freedesktop.org >Subject: RE: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context >buffer on the platforms without LLC > >Quoting Zhao, Yakui (2018-06-22 08:29:15) >> >> >> >-Original Message- >> >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >> >Sent: Friday, June 22, 2018 2:36 PM >> >To: Zhao, Yakui ; >> >intel-gfx@lists.freedesktop.org >> >Cc: Zhao, Yakui >> >Subject: Re: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists >> >context buffer on the platforms without LLC >> > >> >Quoting Zhao Yakui (2018-06-22 07:09:10) >> >> @@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context >*ctx, >> >> struct intel_engine_cs *engine, >> >> struct intel_ring *ring) { >> >> + enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : >> >> + I915_MAP_WC; >> >> void *vaddr; >> >> u32 *regs; >> >> int ret; >> >> @@ -2738,13 +2740,12 @@ populate_lr_context(struct >i915_gem_context >> >*ctx, >> >> return ret; >> >> } >> >> >> >> - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); >> >> + vaddr = i915_gem_object_pin_map(ctx_obj, map); >> > >> >As this uses the cpu domain and flushed afterwards, this one is >> >correct in its usage of MAP_WB. >> >> In this function the content of context state is flushed. >> >> But the function of execlists_submit_ports will update it again before >> writing >the ELSP port. >> And there is no flush. In fact after the ELSP port is written, the HW will >> start >to execute the submitted commands. > >That's a different map. Really? It is allocated in one gem obj. Will you please help to point out where to handle the different map? Thanks Yakui >-Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC
>-Original Message- >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >Sent: Friday, June 22, 2018 2:26 PM >To: Zhao, Yakui ; intel-gfx@lists.freedesktop.org >Cc: Zhao, Yakui >Subject: Re: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context >buffer on the platforms without LLC > >Quoting Zhao Yakui (2018-06-22 07:09:10) >> Under execlists mode the context buffer is allocated in global Gtt region. >> The I915_MAP_WB type is used to map the buffer so that the driver can >> initialize the context buffer.(Ring reg, Context Ctrl reg and so on). >> And then __context_pin is called to flush back corresponding contents. >> In fact as it also tries to update context buffer (Ring Tail offset) >> before writing the ELSP port, it has no explicit cache flsuh.Maybe it >> is handled by HW. But this is quite confusing as BXT has no LLC. So >> the WC is used to map the context buffer on the platform without LLC >> and the update of context buffer is writen into phys page directly. It >> will be safer. >> >> V1->V2: Remove the dirty flag of execlists state buffer and one minor >> typo in commit log > >The object's pages are still dirty, so why? It's not about CPU cache dirt, >here it >is about whether the pages differ from any potential swapcache. > Based on the test it seems that this patch still has some problems. More works are needed in order to change the MAP type. Maybe this buffer should be handled like intel_ring biffer. I will check it later. >I was anticipating there would be some type conflict with >engine->pinned_default_state, but that just happens to work out >correctly... so long as there is always a retirement during load and we park >before any reset. Hmm. >-Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC
>-Original Message- >From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] >Sent: Friday, June 22, 2018 2:36 PM >To: Zhao, Yakui ; intel-gfx@lists.freedesktop.org >Cc: Zhao, Yakui >Subject: Re: [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context >buffer on the platforms without LLC > >Quoting Zhao Yakui (2018-06-22 07:09:10) >> @@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context *ctx, >> struct intel_engine_cs *engine, >> struct intel_ring *ring) { >> + enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : >> + I915_MAP_WC; >> void *vaddr; >> u32 *regs; >> int ret; >> @@ -2738,13 +2740,12 @@ populate_lr_context(struct i915_gem_context >*ctx, >> return ret; >> } >> >> - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); >> + vaddr = i915_gem_object_pin_map(ctx_obj, map); > >As this uses the cpu domain and flushed afterwards, this one is correct in its >usage of MAP_WB. In this function the content of context state is flushed. But the function of execlists_submit_ports will update it again before writing the ELSP port. And there is no flush. In fact after the ELSP port is written, the HW will start to execute the submitted commands. >-Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V2] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC
Under execlists mode the context buffer is allocated in global Gtt region. The I915_MAP_WB type is used to map the buffer so that the driver can initialize the context buffer.(Ring reg, Context Ctrl reg and so on). And then __context_pin is called to flush back corresponding contents. In fact as it also tries to update context buffer (Ring Tail offset) before writing the ELSP port, it has no explicit cache flsuh.Maybe it is handled by HW. But this is quite confusing as BXT has no LLC. So the WC is used to map the context buffer on the platform without LLC and the update of context buffer is writen into phys page directly. It will be safer. V1->V2: Remove the dirty flag of execlists state buffer and one minor typo in commit log Signed-off-by: Zhao Yakui CC: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 10deebe..5ffd76e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1386,6 +1386,7 @@ __execlists_context_pin(struct intel_engine_cs *engine, { void *vaddr; int ret; + enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC; ret = execlists_context_deferred_alloc(ctx, engine, ce); if (ret) @@ -1396,7 +1397,7 @@ __execlists_context_pin(struct intel_engine_cs *engine, if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(ce->state->obj, map); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto unpin_vma; @@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring) { + enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC; void *vaddr; u32 *regs; int ret; @@ -2738,13 +2740,12 @@ populate_lr_context(struct i915_gem_context *ctx, return ret; } - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(ctx_obj, map); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); return ret; } - ctx_obj->mm.dirty = true; if (engine->default_state) { /* @@ -2756,7 +2757,7 @@ populate_lr_context(struct i915_gem_context *ctx, void *defaults; defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); + map); if (IS_ERR(defaults)) { ret = PTR_ERR(defaults); goto err_unpin_ctx; @@ -2851,6 +2852,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; + enum i915_map_type map = HAS_LLC(dev_priv) ? I915_MAP_WB : I915_MAP_WC; /* Because we emit WA_TAIL_DWORDS there may be a disparity * between our bookkeeping in ce->ring->head and ce->ring->tail and @@ -2872,7 +2874,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) continue; reg = i915_gem_object_pin_map(ce->state->obj, - I915_MAP_WB); + map); if (WARN_ON(IS_ERR(reg))) continue; @@ -2880,7 +2882,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) reg[CTX_RING_HEAD+1] = 0; reg[CTX_RING_TAIL+1] = 0; - ce->state->obj->mm.dirty = true; i915_gem_object_unpin_map(ce->state->obj); intel_ring_reset(ce->ring, 0); -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Use I915_MAP_WC for execlists context buffer on the platforms without LLC
Under execlists mode the context buffer is allocated in global Gtt region. The I915_MAP_WB type is used to map the buffer so that the driver can initialize the context buffer.(Ring reg, Context Ctrl reg and so on). And then __context_pin is called to flush back corresponding contents. In fact as it also tries to update context buffer (Ring Tail offset) before writting the ELSP port, it has no explicit cache flsuh.Maybe it is handled by HW. But this is quite confusing as BXT has no LLC. So the WC is used to map the context buffer on the platform without LLC and the update of context buffer is writen into phys page directly. It will be safer. Signed-off-by: Zhao Yakui CC: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 10deebe..a76ea83 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1386,6 +1386,7 @@ __execlists_context_pin(struct intel_engine_cs *engine, { void *vaddr; int ret; + enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC; ret = execlists_context_deferred_alloc(ctx, engine, ce); if (ret) @@ -1396,7 +1397,7 @@ __execlists_context_pin(struct intel_engine_cs *engine, if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(ce->state->obj, map); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto unpin_vma; @@ -2728,6 +2729,7 @@ populate_lr_context(struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring) { + enum i915_map_type map = HAS_LLC(ctx->i915) ? I915_MAP_WB : I915_MAP_WC; void *vaddr; u32 *regs; int ret; @@ -2738,7 +2740,7 @@ populate_lr_context(struct i915_gem_context *ctx, return ret; } - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(ctx_obj, map); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); @@ -2756,7 +2758,7 @@ populate_lr_context(struct i915_gem_context *ctx, void *defaults; defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); + map); if (IS_ERR(defaults)) { ret = PTR_ERR(defaults); goto err_unpin_ctx; -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries
On 07/13/2016 06:04 PM, Deak, Imre wrote: Hi Yakui, thanks for taking a look at these, see my comment below. On ke, 2016-07-13 at 10:22 +0800, Zhao Yakui wrote: On 07/01/2016 09:40 PM, Deak, Imre wrote: The purpose for each MOCS entry isn't well defined atm. Defining these is important to remove any uncertainty about the use of these entries for example in terms of performance and GPU/CPU coherency. Suggested by Ville. CC: Rong R Yang<rong.r.y...@intel.com> CC: Yakui Zhao<yakui.z...@intel.com> CC: Ville Syrjälä<ville.syrj...@linux.intel.com> CC: Chris Wilson<ch...@chris-wilson.co.uk> Signed-off-by: Imre Deak<imre.d...@intel.com> This looks readable and meaningful after giving proper names to MOCS entry index. But not sure whether the comment of I915_MOCS_CACHE has one typo? --- drivers/gpu/drm/i915/intel_mocs.c | 13 +++-- include/uapi/drm/i915_drm.h | 24 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f..86adc11 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x0009 */ + [I915_MOCS_UNCACHED] = { + /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_AUTO] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_AUTO] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c17d63d..a5d116f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT"ERROR" #define I915_RESET_UEVENT"RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* +* Not cached anywhere, coherency between CPU and GPU accesses is +* guaranteed. +*/ + I915_MOCS_UNCACHED, + /* +* Cacheability and coherency controlled by the kernel automatically +* based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current +* usage of the surface (used for display scanout or not). +*/ + I915_MOCS_AUTO, + /* +* Cached in all GPU caches available on the platform. +* Coherency between CPU and GPU accesses to the surface is not +* guaranteed without extra synchronization. +*/ IMO the coherency is guaranteed without extra synchronization for the MOCS_CACHED. No. On BXT it will make the data cached in GPU caches but will not keep the data coherent between GPU and CPU without extra synchronization. For that we would need to enable snooping, but that has considerable overhead, so we turn that off in patch 2/3. On
Re: [Intel-gfx] [PATCH v3 2/3] drm/i915/bxt: Fix inadvertent CPU snooping due to incorrect MOCS config
On 07/01/2016 09:40 PM, Deak, Imre wrote: Setting a write-back cache policy in the MOCS entry definition also implies snooping, which has a considerable overhead. This is unexpected for a few reasons: - From user-space's point of view since it didn't want a coherent surface (it didn't set the buffer as such via the set caching IOCTL). - There is a separate MOCS entry field for snooping (which we never set). - This MOCS table is about caching in (e)LLC and there is no (e)LLC on BXT. There is a separate table for L3 cache control. Considering the above the current behavior of snooping looks like an unintentional side-effect of the WB setting. Changing it to be LLC-UC gets rid of the snooping without any ill-effects. For a coherent surface the application would use a separate MOCS entry at index 1 and call the set caching IOCTL to setup the PTE entries for the corresponding buffer to be snooped. In the future we could also add a new MOCS entry for coherent surfaces. This resulted in 70% improvement in synthetic texturing benchmarks. Kudos to Valtteri Rantala, Eero Tamminen and Michael T Frederick and Ville who helped to narrow the source of problem to the kernel and to the snooping behaviour in particular. With a follow-up change to adjust the 3rd entry value igt/gem_mocs_settings is passing after this change. v2: - Rebase on v2 of patch 1/2. v3: - Set the entry as LLC uncached instead of PTE-passthrough. This way we also keep snooping disabled, but we also make the cacheability/ coherency setting indepent of the PTE which is managed by the kernel. (Chris) CC: Rong R Yang<rong.r.y...@intel.com> CC: Yakui Zhao<yakui.z...@intel.com> CC: Valtteri Rantala<valtteri.rant...@intel.com> CC: Eero Tamminen<eero.t.tammi...@intel.com> CC: Michael T Frederick<michael.t.freder...@intel.com> CC: Ville Syrjälä<ville.syrj...@linux.intel.com> CC: Chris Wilson<ch...@chris-wilson.co.uk> Signed-off-by: Imre Deak<imre.d...@intel.com> As the BXT has no LLC, setting the WB-policy will add the extra overhead. In such case the patch looks more reasonable for BXT. Add: Acked-by: Zhao Yakui <yakui.z...@intel.com> --- drivers/gpu/drm/i915/intel_mocs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index d36e609..927825f 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -149,8 +149,8 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, { - /* 0x003b */ - .control_value = LE_CACHEABILITY(LE_WB) | + /* 0x0039 */ + .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0), ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries
On 07/01/2016 09:40 PM, Deak, Imre wrote: The purpose for each MOCS entry isn't well defined atm. Defining these is important to remove any uncertainty about the use of these entries for example in terms of performance and GPU/CPU coherency. Suggested by Ville. CC: Rong R YangCC: Yakui Zhao CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak This looks readable and meaningful after giving proper names to MOCS entry index. But not sure whether the comment of I915_MOCS_CACHE has one typo? --- drivers/gpu/drm/i915/intel_mocs.c | 13 +++-- include/uapi/drm/i915_drm.h | 24 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f..86adc11 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x0009 */ + [I915_MOCS_UNCACHED] = { + /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_AUTO] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x0009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_AUTO] = { /* 0x0038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c17d63d..a5d116f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* +* Not cached anywhere, coherency between CPU and GPU accesses is +* guaranteed. +*/ + I915_MOCS_UNCACHED, + /* +* Cacheability and coherency controlled by the kernel automatically +* based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current +* usage of the surface (used for display scanout or not). +*/ + I915_MOCS_AUTO, + /* +* Cached in all GPU caches available on the platform. +* Coherency between CPU and GPU accesses to the surface is not +* guaranteed without extra synchronization. +*/ IMO the coherency is guaranteed without extra synchronization for the MOCS_CACHED. + I915_MOCS_CACHED, +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 1/3] drm/i915/gen9: Clean up MOCS table definitions
On 07/01/2016 09:40 PM, Deak, Imre wrote: Use named struct initializers for clarity. Also fix the target cache definition to reflect its role in GEN9 onwards. On GEN8 a TC value of 0 meant ELLC but on GEN9+ it means the TC and LRU controls are taken from the PTE. No functional change, igt/gem_mocs_settings still passing after this change. v2: (Chris) - Add back the hexa literals for the entries. Add note that igt/gem_mocs_settings still passes. CC: Rong R Yang<rong.r.y...@intel.com> CC: Yakui Zhao<yakui.z...@intel.com> CC: Chris Wilson<ch...@chris-wilson.co.uk> Signed-off-by: Imre Deak<imre.d...@intel.com> It is helpful to understand the MOCS table definition after cleaning up. Add: Acked-by: Zhao Yakui <yakui.z...@intel.com> Thanks Yakui --- drivers/gpu/drm/i915/intel_mocs.c | 88 +++ 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 3c1482b..d36e609 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -66,9 +66,10 @@ struct drm_i915_mocs_table { #define L3_WB 3 /* Target cache */ -#define ELLC 0 -#define LLC1 -#define LLC_ELLC 2 +#define LE_TC_PAGETABLE0 +#define LE_TC_LLC 1 +#define LE_TC_LLC_ELLC 2 +#define LE_TC_LLC_ELLC_ALT 3 /* * MOCS tables @@ -96,34 +97,67 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - /* { 0x0009, 0x0010 } */ - { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) }, - /* { 0x0038, 0x0030 } */ - { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }, - /* { 0x003b, 0x0030 } */ - { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) } + { /* 0x0009 */ + .control_value = LE_CACHEABILITY(LE_UC) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0010 */ + .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + }, + { + /* 0x0038 */ + .control_value = LE_CACHEABILITY(LE_PAGETABLE) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + /* 0x0030 */ + .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, + { + /* 0x003b */ + .control_value = LE_CACHEABILITY(LE_WB) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + /* 0x0030 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - /* { 0x0009, 0x0010 } */ - { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) }, - /* { 0x0038, 0x0030 } */ - { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }, - /* { 0x003b, 0x0030 } */ - { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) } + { + /* 0x0009 */ + .control_value = LE_CACHEABILITY(LE_UC) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0010 */ + .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + }, + { + /* 0x0038 */ + .control_value = LE_CACHEABILITY(LE_PAGETABLE) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE
Re: [Intel-gfx] [PATCH 1/7] drm/i915: Specify bsd rings through exec flag
On Wed, 2014-12-10 at 08:55 -0700, Dave Gordon wrote: On 10/12/14 09:11, Daniel Vetter wrote: On Wed, Dec 10, 2014 at 02:18:15AM +, Gong, Zhipeng wrote: On Tue, 2014-12-09 at 10:46 +0100, Daniel Vetter wrote: On Mon, Dec 08, 2014 at 01:55:56PM -0800, Rodrigo Vivi wrote: [snip] diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e1ed85a..d9081ec 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1273,8 +1273,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, else if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_BSD) { if (HAS_BSD2(dev)) { int ring_id; - ring_id = gen8_dispatch_bsd_ring(dev, file); - ring = dev_priv-ring[ring_id]; + + switch (args-flags I915_EXEC_BSD_MASK) { + case I915_EXEC_BSD_DEFAULT: + ring_id = gen8_dispatch_bsd_ring(dev, file); + ring = dev_priv-ring[ring_id]; + break; + case I915_EXEC_BSD_RING1: + ring = dev_priv-ring[VCS]; Do we have any use-case for selecting ring1 specifically? I've thought it's only ring2 that is special? The HEVC GPU commands should be dispatched to BSD RING 1 instead of BSD RING2 as the two rings are asymmetrical. For the H264 decoding/encoding either ring is OK. Well then same arguments applies with ring2 since only ring1 is special? It's just to minimize abi and reduce the amount of rope we hand to userspace. Anyone who knows to use any of these flags is taking responsibility for doing explicit engine allocation, so why not give them all the options -- if for no other reason, more symmetry is good. Agree with Dave's point. The override flag is initiated by the SKL GT3 platform, which requires that the HEVC GPU command can only be dispatched to the BSD ring1 explicitly as the two BSD rings are not symmetric. And the override flag can also provide the user-space app/driver with more flexibility to explicitly determine which BSD ring should be used to dispatch video GPU command instead of kernel ping-pong mode. And it benefits the platform with two BSD rings. As an examle, there could be a case where userspace knows better than the kernel how long each batch will take, and can predict an optimal allocation pattern rather than just flip-flopping. So even when a batch *can* run on either engine, there might be a reason to pick a specific one. e.g. short-1 - ring 1 short-2 - ring 1 long-1 - ring 2 short-3 - ring 1 long-2 - ring 1 because the program knows that the three short batches together will take less time than the one first long one. .Dave. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t 1/2] Rendercopy/skl: Remove redundant field to fix GPU hang
After applying the commit(982f7eb238a0898c456e0574dee7c4507738d75f), the OUT_RELOC is updated on Broadwell and later, which is to handle the 64-bit field of gfx address internally. In such case some commands should be fixed, otherwise GPU hang will be triggered when running rendercopy. (It is already fixed on Broadwell) Signed-off-by: Zhao Yakui yakui.z...@intel.com --- lib/rendercopy_gen9.c | 4 1 file changed, 4 deletions(-) diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c index 9ff4b3a..e20a84f 100644 --- a/lib/rendercopy_gen9.c +++ b/lib/rendercopy_gen9.c @@ -397,7 +397,6 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch, GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */ VERTEX_SIZE VB0_BUFFER_PITCH_SHIFT); OUT_RELOC(batch-bo, I915_GEM_DOMAIN_VERTEX, 0, offset); - OUT_BATCH(0); OUT_BATCH(3 * VERTEX_SIZE); } @@ -523,12 +522,10 @@ gen9_emit_state_base_address(struct intel_batchbuffer *batch) { /* surface */ OUT_RELOC(batch-bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(0); /* dynamic */ OUT_RELOC(batch-bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(0); /* indirect */ OUT_BATCH(0); @@ -536,7 +533,6 @@ gen9_emit_state_base_address(struct intel_batchbuffer *batch) { /* instruction */ OUT_RELOC(batch-bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(0); /* general state buffer size */ OUT_BATCH(0xf000 | 1); -- 1.8.5.dirty ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t 2/2] Mediafill/skl: Remove redundant field to fix GPU hang
After applying the commit(982f7eb238a0898c456e0574dee7c4507738d75f), the OUT_RELOC is updated on Broadwell and later, which is to handle the 64-bit field of gfx address internally. In such case some commands should be fixed, otherwise GPU hang will be triggered when running gem_media_fill. (It is already fixed on Broadwell) Signed-off-by: Zhao Yakui yakui.z...@intel.com --- lib/media_fill_gen9.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c index 6c6ba89..3fd2181 100644 --- a/lib/media_fill_gen9.c +++ b/lib/media_fill_gen9.c @@ -205,12 +205,10 @@ gen9_emit_state_base_address(struct intel_batchbuffer *batch) /* surface */ OUT_RELOC(batch-bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(0); /* dynamic */ OUT_RELOC(batch-bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(0); /* indirect */ OUT_BATCH(0); @@ -218,7 +216,6 @@ gen9_emit_state_base_address(struct intel_batchbuffer *batch) /* instruction */ OUT_RELOC(batch-bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); - OUT_BATCH(0); /* general state buffer size */ OUT_BATCH(0xf000 | 1); -- 1.8.5.dirty ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Specify bsd rings through exec flag
On Tue, 2014-08-05 at 02:44 -0600, Daniel Vetter wrote: On Tue, Aug 05, 2014 at 03:54:04PM +0800, Zhipeng Gong wrote: On Broadwell GT3 we have 2 Video Command Streamers (VCS), but userspace has no control when using VCS1 or VCS2. This patch introduces a mechanism to avoid the default ping-pong mode and use one specific ring through execution flag. Signed-off-by: Zhipeng Gong zhipeng.g...@intel.com This needs an open-source user and proper justification why we need this. On bdw. Iirc the only users is content protection which isn't open-source due to the usual concerns, so if that hasn't changed this patch is rejected. Hi, Daniel The open-source media driver also needs this feature for BDW. Now we are planning to add the following function that depends on this flag for BDW with two BSD rings. After the GPU hang occurs on BSD ring during decoding, it needs to specify the corresponding BSD ring to read the decoding status registers related with the BSD ring. Can this be regarded as one open-source usage scenario? Also you'd need igt tests and all that too. I agree. The igt test is needed. Maybe one patch is missing that exposes the flag of dual BSD rings. Only when the flag exists, we can specify which bsd ring to dispatch the BSD video command. -Daniel --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +-- include/uapi/drm/i915_drm.h| 8 +++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 60998fc..f9ed8e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1279,8 +1279,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, else if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_BSD) { if (HAS_BSD2(dev)) { int ring_id; - ring_id = gen8_dispatch_bsd_ring(dev, file); - ring = dev_priv-ring[ring_id]; + + switch (args-flags I915_EXEC_BSD_MASK) { + case I915_EXEC_BSD_DEFAULT: + ring_id = gen8_dispatch_bsd_ring(dev, file); + ring = dev_priv-ring[ring_id]; + break; + case I915_EXEC_BSD_RING1: + ring = dev_priv-ring[VCS]; + break; + case I915_EXEC_BSD_RING2: + ring = dev_priv-ring[VCS2]; + break; + default: + DRM_DEBUG(execbuf with unknown bsd ring: %d\n, + (int)(args-flags I915_EXEC_BSD_MASK)); + return -EINVAL; + } } else ring = dev_priv-ring[VCS]; } else diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ff57f07..421420a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -736,7 +736,13 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (112) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT1) +/** Used for switching BSD rings on the platforms with two BSD rings */ +#define I915_EXEC_BSD_MASK (313) +#define I915_EXEC_BSD_DEFAULT (013) /* default ping-pong mode */ +#define I915_EXEC_BSD_RING1(113) +#define I915_EXEC_BSD_RING2(213) + +#define __I915_EXEC_UNKNOWN_FLAGS -(115) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 2.0.3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce dual_bsd_ring parameter.
On Mon, 2014-06-30 at 10:51 -0600, Rodrigo Vivi wrote: On Broadwell GT3 we have 2 Video Command Streamers (VCS), but userspace has no control when using VCS1 or VCS2. So we cannot test, validate or debug specific changes or workaround that might affect only one or another ring. So this patch introduces a mechanism to avoid the ping-pong selection and use one specific ring given at boot time. If it is mainly used for the test/validation, can we add one override flag so that the user-space app can explicitly declare which BSD ring is used to dispatch the corresponding BSD commands? In such case it will force to dispatch the corresponding commands on the ring passed by user-application. At the same time this patch is not helpful under the following scenario. For example: One application hopes to use the BSD Ring 0 while another application hopes to use the BSD ring 1. Signed-off-by: Rodrigo Vivi rodrigo.v...@intel.com --- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 ++ drivers/gpu/drm/i915/i915_params.c | 6 ++ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8cea596..7b6614f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2069,6 +2069,7 @@ struct i915_params { int panel_ignore_lid; unsigned int powersave; int semaphores; + int dual_bsd_ring; unsigned int lvds_downclock; int lvds_channel_mode; int panel_use_ssc; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d815ef5..09f350e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1035,26 +1035,32 @@ static int gen8_dispatch_bsd_ring(struct drm_device *dev, { struct drm_i915_private *dev_priv = dev-dev_private; struct drm_i915_file_private *file_priv = file-driver_priv; + int ring_id; + int dual = i915.dual_bsd_ring; /* Check whether the file_priv is using one ring */ if (file_priv-bsd_ring) return file_priv-bsd_ring-id; - else { - /* If no, use the ping-pong mechanism to select one ring */ - int ring_id; - mutex_lock(dev-struct_mutex); - if (dev_priv-mm.bsd_ring_dispatch_index == 0) { - ring_id = VCS; - dev_priv-mm.bsd_ring_dispatch_index = 1; - } else { - ring_id = VCS2; - dev_priv-mm.bsd_ring_dispatch_index = 0; - } - file_priv-bsd_ring = dev_priv-ring[ring_id]; - mutex_unlock(dev-struct_mutex); - return ring_id; + /* If no, use the parameter defined or ping-pong mechanism + * to select one ring */ + mutex_lock(dev-struct_mutex); + + if (dual == 1 || (dual != 2 + dev_priv-mm.bsd_ring_dispatch_index == 0)) { + ring_id = VCS; + dev_priv-mm.bsd_ring_dispatch_index = 1; + } else { + ring_id = VCS2; + dev_priv-mm.bsd_ring_dispatch_index = 0; } + + file_priv-bsd_ring = dev_priv-ring[ring_id]; + mutex_unlock(dev-struct_mutex); + + WARN(dual, Forcibly trying to use only one bsd ring. Using: %s\n, + file_priv-bsd_ring-name); + return ring_id; } static struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8145729..d4871c8 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -29,6 +29,7 @@ struct i915_params i915 __read_mostly = { .panel_ignore_lid = 1, .powersave = 1, .semaphores = -1, + .dual_bsd_ring = 0, .lvds_downclock = 0, .lvds_channel_mode = 0, .panel_use_ssc = -1, @@ -70,6 +71,11 @@ MODULE_PARM_DESC(semaphores, Use semaphores for inter-ring sync (default: -1 (use per-chip defaults))); +module_param_named(dual_bsd_ring, i915.dual_bsd_ring, int, 0600); +MODULE_PARM_DESC(dual_bsd_ring, + Specify bds rings for VCS when there are multiple VCSs available. + (0=All available bsd rings [default], 1=only VCS1, 2=only VCS2)); + module_param_named(enable_rc6, i915.enable_rc6, int, 0400); MODULE_PARM_DESC(enable_rc6, Enable power-saving render C-state 6. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Ringbuffer signal func for the second BSD ring
On Mon, 2014-05-12 at 13:04 -0600, Daniel Vetter wrote: On Fri, May 09, 2014 at 01:44:59PM +0100, oscar.ma...@intel.com wrote: From: Oscar Mateo oscar.ma...@intel.com This is missing in: commit 78325f2d270897c9ee0887125b7abb963eb8efea Author: Ben Widawsky benjamin.widaw...@intel.com Date: Tue Apr 29 14:52:29 2014 -0700 drm/i915: Virtualize the ringbuffer signal func Looks to me like a rebase side-effect... Signed-off-by: Oscar Mateo oscar.ma...@intel.com Queued for -next, thanks for the patch. Iirc there's been a regression report too. Anyone know the bugzilla? It seems that this patch can fix the issue in: https://bugs.freedesktop.org/show_bug.cgi?id=78274 Thanks. Yakui -Daniel --- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9907d66..203fa2b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2182,6 +2182,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) ring-dispatch_execbuffer = gen8_ring_dispatch_execbuffer; ring-semaphore.sync_to = gen6_ring_sync; + ring-semaphore.signal = gen6_signal; /* * The current semaphore is only applied on the pre-gen8. And there * is no bsd2 ring on the pre-gen8. So now the semaphore_register -- 1.9.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] tests: Add gem_exec_params
On Wed, 2014-04-23 at 12:32 -0600, Daniel Vetter wrote: This fills all the gaps we've had in our execbuf testing. Overflow testing of the various arrays is already done by gem_reloc_overflow. Also add kms_flip_tiling to .gitignore. This will cause a bunch of failures since current kernels don't catch all fallout. Very good patch. Except some small concerns, it is OK to me. Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/.gitignore| 2 + tests/Makefile.sources | 1 + tests/gem_exec_params.c | 212 3 files changed, 215 insertions(+) create mode 100644 tests/gem_exec_params.c diff --git a/tests/.gitignore b/tests/.gitignore index 146bab06b565..4c50bae93aa3 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -35,6 +35,7 @@ gem_exec_blt gem_exec_faulting_reloc gem_exec_lut_handle gem_exec_nop +gem_exec_params gem_exec_parse gem_fd_exhaustion gem_fenced_exec_thrash @@ -113,6 +114,7 @@ kms_addfb kms_cursor_crc kms_fbc_crc kms_flip +kms_flip_tiling kms_pipe_crc_basic kms_plane kms_render diff --git a/tests/Makefile.sources b/tests/Makefile.sources index c957ace2ace0..9b2d7cff1113 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -29,6 +29,7 @@ TESTS_progs_M = \ gem_exec_bad_domains \ gem_exec_faulting_reloc \ gem_exec_nop \ + gem_exec_params \ gem_exec_parse \ gem_fenced_exec_thrash \ gem_fence_thrash \ diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c new file mode 100644 index ..b1d996c530f5 --- /dev/null +++ b/tests/gem_exec_params.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter + * + */ + +#include unistd.h +#include stdlib.h +#include stdint.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/ioctl.h +#include sys/time.h +#include drm.h + +#include ioctl_wrappers.h +#include drmtest.h +#include intel_io.h +#include intel_chipset.h +#include igt_aux.h + +#define LOCAL_I915_EXEC_VEBOX (40) + +struct drm_i915_gem_execbuffer2 execbuf; +struct drm_i915_gem_exec_object2 gem_exec[1]; +uint32_t batch[2] = {MI_BATCH_BUFFER_END}; +uint32_t handle, devid; +int fd; + +igt_main +{ + igt_fixture { + fd = drm_open_any(); + + devid = intel_get_drm_devid(fd); + + handle = gem_create(fd, 4096); + gem_write(fd, handle, 0, batch, sizeof(batch)); + + gem_exec[0].handle = handle; + gem_exec[0].relocation_count = 0; + gem_exec[0].relocs_ptr = 0; + gem_exec[0].alignment = 0; + gem_exec[0].offset = 0; + gem_exec[0].flags = 0; + gem_exec[0].rsvd1 = 0; + gem_exec[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)gem_exec; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = 8; Can we use the sizeof(batch) instead of 8? + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = 0; + i915_execbuffer2_set_context_id(execbuf, 0); + execbuf.rsvd2 = 0; + } + + igt_subtest(control) { + igt_assert(drmIoctl(fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + execbuf) == 0); + execbuf.flags = I915_EXEC_RENDER; + igt_assert(drmIoctl(fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, +
Re: [Intel-gfx] [PATCH] tests: Add gem_exec_params
On Thu, 2014-04-24 at 01:18 -0600, Daniel Vetter wrote: On Thu, Apr 24, 2014 at 8:43 AM, Zhao Yakui yakui.z...@intel.com wrote: On Wed, 2014-04-23 at 12:32 -0600, Daniel Vetter wrote: This fills all the gaps we've had in our execbuf testing. Overflow testing of the various arrays is already done by gem_reloc_overflow. Also add kms_flip_tiling to .gitignore. This will cause a bunch of failures since current kernels don't catch all fallout. Very good patch. Except some small concerns, it is OK to me. Thanks for your comments, replies below. -Daniel Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/.gitignore| 2 + tests/Makefile.sources | 1 + tests/gem_exec_params.c | 212 3 files changed, 215 insertions(+) create mode 100644 tests/gem_exec_params.c diff --git a/tests/.gitignore b/tests/.gitignore index 146bab06b565..4c50bae93aa3 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -35,6 +35,7 @@ gem_exec_blt gem_exec_faulting_reloc gem_exec_lut_handle gem_exec_nop +gem_exec_params gem_exec_parse gem_fd_exhaustion gem_fenced_exec_thrash @@ -113,6 +114,7 @@ kms_addfb kms_cursor_crc kms_fbc_crc kms_flip +kms_flip_tiling kms_pipe_crc_basic kms_plane kms_render diff --git a/tests/Makefile.sources b/tests/Makefile.sources index c957ace2ace0..9b2d7cff1113 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -29,6 +29,7 @@ TESTS_progs_M = \ gem_exec_bad_domains \ gem_exec_faulting_reloc \ gem_exec_nop \ + gem_exec_params \ gem_exec_parse \ gem_fenced_exec_thrash \ gem_fence_thrash \ diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c new file mode 100644 index ..b1d996c530f5 --- /dev/null +++ b/tests/gem_exec_params.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter + * + */ + +#include unistd.h +#include stdlib.h +#include stdint.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/ioctl.h +#include sys/time.h +#include drm.h + +#include ioctl_wrappers.h +#include drmtest.h +#include intel_io.h +#include intel_chipset.h +#include igt_aux.h + +#define LOCAL_I915_EXEC_VEBOX (40) + +struct drm_i915_gem_execbuffer2 execbuf; +struct drm_i915_gem_exec_object2 gem_exec[1]; +uint32_t batch[2] = {MI_BATCH_BUFFER_END}; +uint32_t handle, devid; +int fd; + +igt_main +{ + igt_fixture { + fd = drm_open_any(); + + devid = intel_get_drm_devid(fd); + + handle = gem_create(fd, 4096); + gem_write(fd, handle, 0, batch, sizeof(batch)); + + gem_exec[0].handle = handle; + gem_exec[0].relocation_count = 0; + gem_exec[0].relocs_ptr = 0; + gem_exec[0].alignment = 0; + gem_exec[0].offset = 0; + gem_exec[0].flags = 0; + gem_exec[0].rsvd1 = 0; + gem_exec[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t)gem_exec; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = 8; Can we use the sizeof(batch) instead of 8? We use noop batches like this all over the place and it's kinda all hard-coded magic numbers. Constructing execbufs manually is one of those areas in igt which are rather painful, but thus far I just didn't come up with a nice approach to it. Hence I think leaving all the brittle magic
Re: [Intel-gfx] [PATCH V4 3/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine
On Thu, 2014-04-24 at 09:21 -0600, Daniel Vetter wrote: On Thu, Apr 17, 2014 at 10:37:37AM +0800, Zhao Yakui wrote: Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. V3-V4: Follow Imre's comment to do some minor updates. For example: more comments are added to describe the semaphore between ring. Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- [snip] +/** + * Initialize the second BSD ring for Broadwell GT3. + * It is noted that this only exists on Broadwell GT3. + */ +int intel_init_bsd2_ring_buffer(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct intel_ring_buffer *ring = dev_priv-ring[VCS2]; + + if ((INTEL_INFO(dev)-gen != 8) ) { Checkpatch complained about the space here, I've fixed it up quickly. Running checkpatch before submitting patches is highly recommended so that we all nicely follow the official linux kernel Coding Style. Thanks for the suggestion. I will pay attention to this next time. Thanks. Yakui Cheers, Daniel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V4 3/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine
On Tue, 2014-04-22 at 13:52 -0600, Daniel Vetter wrote: On Thu, Apr 17, 2014 at 10:37:37AM +0800, Zhao Yakui wrote: Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. V3-V4: Follow Imre's comment to do some minor updates. For example: more comments are added to describe the semaphore between ring. Within a patch series we usually keep revisions for each patch separately, so this would only be v2 for this patch. Once a patch is merge people won't ever look at it in context of your entire series, but just as an individual patch. If your in-patch commit log directly jumps to v4 from v1 then people are left wondering what happened to v2 and v3 ;-) Anyway just a small nit for the next patch series. Good advice. I will pay attention to this next time. Thanks. Yakui -Daniel Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c |4 +- drivers/gpu/drm/i915/i915_drv.h |2 + drivers/gpu/drm/i915/i915_gem.c |9 +++- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 78 +++ drivers/gpu/drm/i915/intel_ringbuffer.h |4 +- 7 files changed, 95 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 17fbbe5..2a7842b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -282,7 +282,7 @@ static const struct intel_device_info intel_broadwell_m_info = { static const struct intel_device_info intel_broadwell_gt3d_info = { .gen = 8, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, @@ -292,7 +292,7 @@ static const struct intel_device_info intel_broadwell_gt3d_info = { static const struct intel_device_info intel_broadwell_gt3m_info = { .gen = 8, .is_mobile = 1, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 92c3095..74aef6a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) +#define BSD2_RING (1VCS2) #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BSD2(dev) (INTEL_INFO(dev)-ring_mask BSD2_RING) #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85c9cf0..65c441c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_blt_ring; } + if (HAS_BSD2(dev)) { + ret = intel_init_bsd2_ring_buffer(dev); + if (ret) + goto cleanup_vebox_ring; + } ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_vebox_ring; + goto cleanup_bsd2_ring; return 0; +cleanup_bsd2_ring: + intel_cleanup_ring_buffer(dev_priv-ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(dev_priv-ring[VECS]); cleanup_blt_ring: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4865ade..282164c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,6 +42,7 @@ static const char *ring_str(int ring) case VCS: return bsd; case BCS: return blt; case VECS: return vebox; + case VCS2: return bsd2; default: return ; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f84555..0b88508 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -760,6 +760,7 @@ enum punit_power_well { #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 +#define
[Intel-gfx] [PATCH I-g-t V4 0/2] Tests: Add test cases based on multi drm_fd to test sync
This follows Daniel's advice to add the two test cases based on multi drm_fd to test the ring sync and CPU-GPU sync. The Broadwell GT3 machine has two independent BSD rings that can be used to process the video commands. This is implemented in kernel driver and transparent to the user-space. But we still need to check the ring sync and CPU-GPU sync for the second BSD ring. Two tests are created based on the multi drm_fds to test the sync. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings/ V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert Zhao Yakui (2): tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings tests/.gitignore|1 + tests/Makefile.sources |1 + tests/gem_dummy_reloc_loop.c| 107 +++- tests/gem_multi_bsd_sync_loop.c | 175 +++ 4 files changed, 283 insertions(+), 1 deletion(-) create mode 100644 tests/gem_multi_bsd_sync_loop.c -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the CPU-GPU sync for the second BSD ring. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings. V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/gem_dummy_reloc_loop.c | 107 +- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c index a61b59b..4e4dd49 100644 --- a/tests/gem_dummy_reloc_loop.c +++ b/tests/gem_dummy_reloc_loop.c @@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; static drm_intel_bo *target_buffer; +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + /* * Testcase: Basic check of ring-cpu sync using a dummy reloc * @@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings) } } +static void +dummy_reloc_loop_random_ring_multi_fd(int num_rings) +{ + int i; + struct intel_batchbuffer *saved_batch; + + saved_batch = batch; + + srandom(0xdeadbeef); + + for (i = 0; i 0x10; i++) { + int mindex; + int ring = random() % num_rings + 1; + + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + + drm_intel_bo_map(target_buffer, 0); + // map to force waiting on rendering + drm_intel_bo_unmap(target_buffer); + } + + batch = saved_batch; +} + int fd; int devid; int num_rings; @@ -133,6 +184,7 @@ igt_main igt_skip_on_simulation(); igt_fixture { + int i; fd = drm_open_any(); devid = intel_get_drm_devid(fd); num_rings = gem_get_num_rings(fd); @@ -148,6 +200,40 @@ igt_main target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 4096); igt_assert(target_buffer); + + /* Create multi drm_fd and map one gem object to multi gem_contexts */ + { + unsigned int target_flink; + char buffer_name[32]; + if (dri_bo_flink(target_buffer, target_flink)) { + printf(fail to get flink for target buffer\n); + igt_assert_f(0, fail to create global +gem_handle for target buffer\n); + } + for (i = 0; i NUM_FD; i++) { + sprintf(buffer_name, Target buffer %d\n, i); + mfd[i] = drm_open_any(); + mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 4096); + igt_assert_f(mbufmgr[i], +fail to initialize buf manager +for drm_fd %d\n, +mfd[i]); + drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i]); + mbatch[i] = intel_batchbuffer_alloc(mbufmgr[i], devid); + igt_assert_f(mbatch[i], +fail to create batchbuffer +for drm_fd %d\n, +mfd[i]); + mbuffer[i] = intel_bo_gem_create_from_name( + mbufmgr[i
Re: [Intel-gfx] [PATCH I-g-t V4 0/2] Tests: Add test cases based on multi drm_fd to test sync
On Wed, 2014-04-23 at 20:02 -0600, Zhao, Yakui wrote: It seems that the patch 01 is filter out. So I will try to resend it again. Thanks. Yakui This follows Daniel's advice to add the two test cases based on multi drm_fd to test the ring sync and CPU-GPU sync. The Broadwell GT3 machine has two independent BSD rings that can be used to process the video commands. This is implemented in kernel driver and transparent to the user-space. But we still need to check the ring sync and CPU-GPU sync for the second BSD ring. Two tests are created based on the multi drm_fds to test the sync. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings/ V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert Zhao Yakui (2): tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings tests/.gitignore|1 + tests/Makefile.sources |1 + tests/gem_dummy_reloc_loop.c| 107 +++- tests/gem_multi_bsd_sync_loop.c | 175 +++ 4 files changed, 283 insertions(+), 1 deletion(-) create mode 100644 tests/gem_multi_bsd_sync_loop.c ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t V4 0/2] Tests: Add test cases based on multi drm_fd to test sync
This follows Daniel's advice to add the two test cases based on multi drm_fd to test the ring sync and CPU-GPU sync. The Broadwell GT3 machine has two independent BSD rings that can be used to process the video commands. This is implemented in kernel driver and transparent to the user-space. But we still need to check the ring sync and CPU-GPU sync for the second BSD ring. Two tests are created based on the multi drm_fds to test the sync. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings/ V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert V3-V4: Add gem_multi_bsd_sync_loop.c into the tests/.gitignore Zhao Yakui (2): tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings tests/.gitignore|1 + tests/Makefile.sources |1 + tests/gem_dummy_reloc_loop.c| 107 +++- tests/gem_multi_bsd_sync_loop.c | 175 +++ 4 files changed, 283 insertions(+), 1 deletion(-) create mode 100644 tests/gem_multi_bsd_sync_loop.c -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t V4 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the ring sync between the two BSD rings. At the same time it also needs to check the sync among the second BSD ring and the other rings. V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert. V3-V4: Add gem_multi_bsd_sync_loop.c into the tests/.gitignore Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/.gitignore|1 + tests/Makefile.sources |1 + tests/gem_multi_bsd_sync_loop.c | 175 +++ 3 files changed, 177 insertions(+) create mode 100644 tests/gem_multi_bsd_sync_loop.c diff --git a/tests/.gitignore b/tests/.gitignore index 146bab0..42690dd 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -54,6 +54,7 @@ gem_media_fill gem_mmap gem_mmap_gtt gem_mmap_offset_exhaustion +gem_multi_bsd_sync_loop gem_non_secure_batch gem_partial_pwrite_pread gem_persistent_relocs diff --git a/tests/Makefile.sources b/tests/Makefile.sources index c957ace..7cd9ca8 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -105,6 +105,7 @@ TESTS_progs = \ gem_render_tiled_blits \ gem_ring_sync_copy \ gem_ring_sync_loop \ + gem_multi_bsd_sync_loop \ gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c new file mode 100644 index 000..b01764a --- /dev/null +++ b/tests/gem_multi_bsd_sync_loop.c @@ -0,0 +1,175 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-ring sync using a dummy reloc + * + * Extremely efficient at catching missed irqs with semaphores=0 ... + */ + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE (121) + +static void +store_dword_loop(int fd) +{ + int i; + int num_rings = gem_get_num_rings(fd); + + srandom(0xdeadbeef); + + for (i = 0; i SLOW_QUICK(0x10, 10); i++) { + int ring, mindex; + ring = random() % num_rings + 1; + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER
Re: [Intel-gfx] [PATCH I-g-t V2 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings
On Tue, 2014-04-22 at 13:48 -0600, Daniel Vetter wrote: On Tue, Apr 22, 2014 at 03:05:03PM +0300, Imre Deak wrote: On Tue, 2014-04-15 at 10:38 +0800, Zhao Yakui wrote: The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the CPU-GPU sync for the second BSD ring. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/gem_dummy_reloc_loop.c | 102 +- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c index a61b59b..660d8e1 100644 --- a/tests/gem_dummy_reloc_loop.c +++ b/tests/gem_dummy_reloc_loop.c @@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; static drm_intel_bo *target_buffer; +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + /* * Testcase: Basic check of ring-cpu sync using a dummy reloc * @@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings) } } +static void +dummy_reloc_loop_random_ring_multi_fd(int num_rings) +{ + int i; + struct intel_batchbuffer *saved_batch; + + saved_batch = batch; + + srandom(0xdeadbeef); + + for (i = 0; i 0x10; i++) { + int mindex; + int ring = random() % num_rings + 1; + + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + + drm_intel_bo_map(target_buffer, 0); + // map to force waiting on rendering + drm_intel_bo_unmap(target_buffer); + } + + batch = saved_batch; +} + int fd; int devid; int num_rings; @@ -133,6 +184,7 @@ igt_main igt_skip_on_simulation(); igt_fixture { + int i; fd = drm_open_any(); devid = intel_get_drm_devid(fd); num_rings = gem_get_num_rings(fd); @@ -148,6 +200,35 @@ igt_main target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 4096); igt_assert(target_buffer); + + /* Create multi drm_fd and map one gem object to multi gem_contexts */ + { + unsigned int target_flink; + char buffer_name[32]; + if (dri_bo_flink(target_buffer, target_flink)) { + printf(fail to get flink for target buffer\n); + igt_assert(0); For the future: could be just igt_assert_f(). Yeah I think for new testcases we should try to use the latest igt_* macros and helpers as much as possible. Reducing control flow and replacing it by the right igt_assert/require/... macro imo really helps the readability of testcases. Hi, Daniel/Imre Thanks for your comments and advice. I will update it. Thanks. Yakui -Daniel + } + for (i = 0; i NUM_FD; i++) { + mfd[i] = 0; + mbufmgr[i] = NULL; + mbuffer[i] = NULL; + } Nitpick: the above are all statics, so no need to init them. Other than the above this looks good: Reviewed-by: Imre Deak imre.d...@intel.com + for (i = 0; i NUM_FD; i++) { + sprintf(buffer_name, Target buffer %d\n, i); + mfd[i] = drm_open_any(); + mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 4096); + igt_assert(mbufmgr[i]); + drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i
Re: [Intel-gfx] [PATCH I-g-t V2 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings
On Tue, 2014-04-22 at 13:44 -0600, Daniel Vetter wrote: On Tue, Apr 22, 2014 at 02:52:04PM +0300, Imre Deak wrote: On Tue, 2014-04-15 at 10:38 +0800, Zhao Yakui wrote: The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the ring sync between the two BSD rings. At the same time it also needs to check the sync among the second BSD ring and the other rings. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/Makefile.sources |1 + tests/gem_multi_bsd_sync_loop.c | 172 +++ 2 files changed, 173 insertions(+) create mode 100644 tests/gem_multi_bsd_sync_loop.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index c957ace..7cd9ca8 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -105,6 +105,7 @@ TESTS_progs = \ gem_render_tiled_blits \ gem_ring_sync_copy \ gem_ring_sync_loop \ + gem_multi_bsd_sync_loop \ gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c new file mode 100644 index 000..7f5b832 --- /dev/null +++ b/tests/gem_multi_bsd_sync_loop.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-ring sync using a dummy reloc + * + * Extremely efficient at catching missed irqs with semaphores=0 ... + */ + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE(121) + +static void +store_dword_loop(int fd) +{ + int i; + int num_rings = gem_get_num_rings(fd); + + srandom(0xdeadbeef); + + for (i = 0; i SLOW_QUICK(0x10, 10); i++) { + int ring, mindex; + ring = random() % num_rings + 1; + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH
[Intel-gfx] [PATCH I-g-t V3 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the ring sync between the two BSD rings. At the same time it also needs to check the sync among the second BSD ring and the other rings. V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/Makefile.sources |1 + tests/gem_multi_bsd_sync_loop.c | 175 +++ 2 files changed, 176 insertions(+) create mode 100644 tests/gem_multi_bsd_sync_loop.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index c957ace..7cd9ca8 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -105,6 +105,7 @@ TESTS_progs = \ gem_render_tiled_blits \ gem_ring_sync_copy \ gem_ring_sync_loop \ + gem_multi_bsd_sync_loop \ gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c new file mode 100644 index 000..b01764a --- /dev/null +++ b/tests/gem_multi_bsd_sync_loop.c @@ -0,0 +1,175 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-ring sync using a dummy reloc + * + * Extremely efficient at catching missed irqs with semaphores=0 ... + */ + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE (121) + +static void +store_dword_loop(int fd) +{ + int i; + int num_rings = gem_get_num_rings(fd); + + srandom(0xdeadbeef); + + for (i = 0; i SLOW_QUICK(0x10, 10); i++) { + int ring, mindex; + ring = random() % num_rings + 1; + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + } + + drm_intel_bo_map(target_buffer, 0); + // map to force waiting on rendering + drm_intel_bo_unmap(target_buffer); +} + +igt_simple_main +{ + int fd; + int devid; + int i
[Intel-gfx] [PATCH I-g-t V3 0/2] Tests: Add test cases based on multi drm_fd to test sync
This follows Daniel's advice to add the two test cases based on multi drm_fd to test the ring sync and CPU-GPU sync. The Broadwell GT3 machine has two independent BSD rings that can be used to process the video commands. This is implemented in kernel driver and transparent to the user-space. But we still need to check the ring sync and CPU-GPU sync for the second BSD ring. Two tests are created based on the multi drm_fds to test the sync. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings/ V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert Zhao Yakui (2): tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings tests/Makefile.sources |1 + tests/gem_dummy_reloc_loop.c| 107 +++- tests/gem_multi_bsd_sync_loop.c | 175 +++ 3 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 tests/gem_multi_bsd_sync_loop.c -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t V3 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the CPU-GPU sync for the second BSD ring. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings. V2-V3: Follow Imre's comment to remove the unnecessary initialization and use igt_assert_f instead of igt_assert Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/gem_dummy_reloc_loop.c | 107 +- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c index a61b59b..4e4dd49 100644 --- a/tests/gem_dummy_reloc_loop.c +++ b/tests/gem_dummy_reloc_loop.c @@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; static drm_intel_bo *target_buffer; +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + /* * Testcase: Basic check of ring-cpu sync using a dummy reloc * @@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings) } } +static void +dummy_reloc_loop_random_ring_multi_fd(int num_rings) +{ + int i; + struct intel_batchbuffer *saved_batch; + + saved_batch = batch; + + srandom(0xdeadbeef); + + for (i = 0; i 0x10; i++) { + int mindex; + int ring = random() % num_rings + 1; + + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + + drm_intel_bo_map(target_buffer, 0); + // map to force waiting on rendering + drm_intel_bo_unmap(target_buffer); + } + + batch = saved_batch; +} + int fd; int devid; int num_rings; @@ -133,6 +184,7 @@ igt_main igt_skip_on_simulation(); igt_fixture { + int i; fd = drm_open_any(); devid = intel_get_drm_devid(fd); num_rings = gem_get_num_rings(fd); @@ -148,6 +200,40 @@ igt_main target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 4096); igt_assert(target_buffer); + + /* Create multi drm_fd and map one gem object to multi gem_contexts */ + { + unsigned int target_flink; + char buffer_name[32]; + if (dri_bo_flink(target_buffer, target_flink)) { + printf(fail to get flink for target buffer\n); + igt_assert_f(0, fail to create global +gem_handle for target buffer\n); + } + for (i = 0; i NUM_FD; i++) { + sprintf(buffer_name, Target buffer %d\n, i); + mfd[i] = drm_open_any(); + mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 4096); + igt_assert_f(mbufmgr[i], +fail to initialize buf manager +for drm_fd %d\n, +mfd[i]); + drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i]); + mbatch[i] = intel_batchbuffer_alloc(mbufmgr[i], devid); + igt_assert_f(mbatch[i], +fail to create batchbuffer +for drm_fd %d\n, +mfd[i]); + mbuffer[i] = intel_bo_gem_create_from_name( + mbufmgr[i
Re: [Intel-gfx] [PATCH V3 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine
On Wed, 2014-04-16 at 10:23 -0600, Deak, Imre wrote: On Wed, 2014-04-16 at 10:41 +0800, Zhao Yakui wrote: Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. Signed-off-by: Zhao Yakui yakui.z...@intel.com A couple of nitpicks below, with or without those: Reviewed-by: Imre Deak imre.d...@intel.com Hi, Imre Thanks for your review and the comments. I will update the patch based on your comment. --- drivers/gpu/drm/i915/i915_drv.c |4 +-- drivers/gpu/drm/i915/i915_drv.h |2 ++ drivers/gpu/drm/i915/i915_gem.c |9 +- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 54 +++ drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++- 7 files changed, 71 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 17fbbe5..2a7842b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -282,7 +282,7 @@ static const struct intel_device_info intel_broadwell_m_info = { static const struct intel_device_info intel_broadwell_gt3d_info = { .gen = 8, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, @@ -292,7 +292,7 @@ static const struct intel_device_info intel_broadwell_gt3d_info = { static const struct intel_device_info intel_broadwell_gt3m_info = { .gen = 8, .is_mobile = 1, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 92c3095..74aef6a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) +#define BSD2_RING (1VCS2) #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BSD2(dev) (INTEL_INFO(dev)-ring_mask BSD2_RING) #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85c9cf0..b4dcf2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_blt_ring; } + if (HAS_BSD2(dev)) { + ret = intel_init_bsd2_ring_buffer(dev); + if (ret) + goto cleanup_vebox_ring; + } ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_vebox_ring; + goto cleanup_ring; maybe cleanup_bsd2_ring? return 0; +cleanup_ring: + intel_cleanup_ring_buffer(dev_priv-ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(dev_priv-ring[VECS]); cleanup_blt_ring: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4865ade..3cab7f9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,6 +42,7 @@ static const char *ring_str(int ring) case VCS: return bsd; case BCS: return blt; case VECS: return vebox; + case VCS2: return second bsd; bsd2 would be more concise OK. I will update it. default: return ; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f84555..0b88508 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -760,6 +760,7 @@ enum punit_power_well { #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 +#define GEN8_BSD2_RING_BASE0x1c000 #define VEBOX_RING_BASE0x1a000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base)((base)+0x30) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb3dd26..8b9b89080 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915
[Intel-gfx] [PATCH V4 2/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space
Signed-off-by: Zhao Yakui yakui.z...@intel.com Reviewed-by: Imre Deak imre.d...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +- drivers/gpu/drm/i915/intel_ringbuffer.h|1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3491402..341ec68 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args-flags I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; - if ((args-flags I915_EXEC_RING_MASK) I915_NUM_RINGS) { + if ((args-flags I915_EXEC_RING_MASK) LAST_USER_RING) { DRM_DEBUG(execbuf with unknown ring: %d\n, (int)(args-flags I915_EXEC_RING_MASK)); return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 413cdc7..ec9d978 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -63,6 +63,7 @@ struct intel_ring_buffer { VECS, } id; #define I915_NUM_RINGS 4 +#define LAST_USER_RING (VECS + 1) u32 mmio_base; void__iomem *virtual_start; struct drm_device *dev; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V4 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3
This is the patch set that tries to add the support of dual BSD rings on BDW GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. In such case the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. V1-V2: Follow Daniel's comment to do the following update: a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01 b. update the comment in Patch 04 c. use the simple ping-pong mechanism to add the support of dual BSD rings. The further optimization will be considered in another patch set. V2-V3: Follow Daniel's comment to use the struct_mutext instead of atomic_t during determining which ring can be used to dispatch Video command. V3-V4: Follow Imre's comment to adjust the patch order and do some minor updates. For example: add some comments to describe the semaphore in Patch 03 and update the ring name for the second bsd ring. Zhao Yakui (6): drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3 drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space drm/i915:Initialize the second BSD ring on BDW GT3 machine drm/i915:Handle the irq interrupt for the second BSD ring drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3 drivers/gpu/drm/i915/i915_dma.c|3 + drivers/gpu/drm/i915/i915_drv.c| 26 - drivers/gpu/drm/i915/i915_drv.h|5 ++ drivers/gpu/drm/i915/i915_gem.c|9 ++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 42 +- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_irq.c|5 +- drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 83 drivers/gpu/drm/i915/intel_ringbuffer.h|5 +- include/drm/i915_pciids.h | 22 ++-- 11 files changed, 190 insertions(+), 12 deletions(-) -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V4 4/6] drm/i915:Handle the irq interrupt for the second BSD ring
Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_irq.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7a4d3ae..63bd5de 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, DRM_ERROR(The master control interrupt lied (GT0)!\n); } - if (master_ctl GEN8_GT_VCS1_IRQ) { + if (master_ctl (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { tmp = I915_READ(GEN8_GT_IIR(1)); if (tmp) { ret = IRQ_HANDLED; vcs = tmp GEN8_VCS1_IRQ_SHIFT; if (vcs GT_RENDER_USER_INTERRUPT) notify_ring(dev, dev_priv-ring[VCS]); + vcs = tmp GEN8_VCS2_IRQ_SHIFT; + if (vcs GT_RENDER_USER_INTERRUPT) + notify_ring(dev, dev_priv-ring[VCS2]); I915_WRITE(GEN8_GT_IIR(1), tmp); } else DRM_ERROR(The master control interrupt lied (GT1)!\n); -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V4 6/6] drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3
The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middle. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case it can play back video stream while encoding another video stream. The coarse ping-pong mechanism is used to determine which BSD ring is used to dispatch the BSD video command. V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism. This is only to add the support of dual BSD rings on BDW GT3 machine. The further optimization will be considered in another patch set. V2-V3: Follow Daniel's comment to use the struct_mutext instead of atomic_t during determining which ring can be used to dispatch Video command. Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_dma.c|3 +++ drivers/gpu/drm/i915/i915_drv.h|3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 40 +++- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0b38f88..f7558f5 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(dev_priv-backlight_lock); spin_lock_init(dev_priv-uncore.lock); spin_lock_init(dev_priv-mm.object_stat_lock); + dev_priv-ring_index = 0; mutex_init(dev_priv-dpio_lock); mutex_init(dev_priv-modeset_restore_lock); @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; + if (file_priv file_priv-bsd_ring) + file_priv-bsd_ring = NULL; kfree(file_priv); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74aef6a..032f992 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1472,6 +1472,8 @@ struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* the indicator for dispatch video commands on two BSD rings */ + int ring_index; }; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -1679,6 +1681,7 @@ struct drm_i915_file_private { struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; + struct intel_ring_buffer *bsd_ring; }; /* diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 341ec68..1dc6f03 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,6 +999,37 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +/** + * Find one BSD ring to dispatch the corresponding BSD command. + * The Ring ID is returned. + */ +static int gen8_dispatch_bsd_ring(struct drm_device *dev, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_file_private *file_priv = file-driver_priv; + + /* Check whether the file_priv is using one ring */ + if (file_priv-bsd_ring) + return file_priv-bsd_ring-id; + else { + /* If no, use the ping-pong mechanism to select one ring */ + int ring_id; + + mutex_lock(dev-struct_mutex); + if (dev_priv-ring_index == 0) { + ring_id = VCS; + dev_priv-ring_index = 1; + } else { + ring_id = VCS2; + dev_priv-ring_index = 0; + } + file_priv-bsd_ring = dev_priv-ring[ring_id]; + mutex_unlock(dev-struct_mutex); + return ring_id; + } +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1043,7 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) ring = dev_priv-ring[RCS]; - else + else if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_BSD) { + if (HAS_BSD2(dev)) { + int ring_id; + ring_id = gen8_dispatch_bsd_ring(dev, file); + ring = dev_priv-ring[ring_id]; + } else + ring = dev_priv-ring[VCS]; + } else ring = dev_priv-ring[(args-flags I915_EXEC_RING_MASK) - 1
[Intel-gfx] [PATCH V4 5/6] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page
The Gen7 doesn't have the second BSD ring. But it will complain the switch check warning message during compilation. So just add it to remove the switch check warning. V1-V2: Follow Daniel's comment to update the comment Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c |5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7e64ab6..1c08dbb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) case BCS: mmio = BLT_HWS_PGA_GEN7; break; + /* +* VCS2 actually doesn't exist on Gen7. Only shut up +* gcc switch check warning +*/ + case VCS2: case VCS: mmio = BSD_HWS_PGA_GEN7; break; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V4 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3
Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. V1-V2: Follow Daniel's comment to pay attention to the stolen check for BDW in kernel/early-quirks.c Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 26 -- include/drm/i915_pciids.h | 22 +- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5d8250f..17fbbe5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -311,8 +331,10 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_HSW_M_IDS(intel_haswell_m_info), \ INTEL_VLV_M_IDS(intel_valleyview_m_info), \ INTEL_VLV_D_IDS(intel_valleyview_d_info), \ - INTEL_BDW_M_IDS(intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(intel_broadwell_d_info) + INTEL_BDW_GT12M_IDS(intel_broadwell_m_info), \ + INTEL_BDW_GT12D_IDS(intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4..24f3cad 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -223,14 +223,26 @@ _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ -#define INTEL_BDW_M_IDS(info) \ +#define INTEL_BDW_GT12M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) -#define INTEL_BDW_D_IDS(info) \ +#define INTEL_BDW_GT12D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) +#define INTEL_BDW_M_IDS(info) \ + INTEL_BDW_GT12M_IDS(info), \ + INTEL_BDW_GT3M_IDS(info) + +#define INTEL_BDW_D_IDS(info) \ + INTEL_BDW_GT12D_IDS(info), \ + INTEL_BDW_GT3D_IDS(info) + #endif /* _I915_PCIIDS_H */ -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V4 3/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine
Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. V3-V4: Follow Imre's comment to do some minor updates. For example: more comments are added to describe the semaphore between ring. Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c |4 +- drivers/gpu/drm/i915/i915_drv.h |2 + drivers/gpu/drm/i915/i915_gem.c |9 +++- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 78 +++ drivers/gpu/drm/i915/intel_ringbuffer.h |4 +- 7 files changed, 95 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 17fbbe5..2a7842b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -282,7 +282,7 @@ static const struct intel_device_info intel_broadwell_m_info = { static const struct intel_device_info intel_broadwell_gt3d_info = { .gen = 8, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, @@ -292,7 +292,7 @@ static const struct intel_device_info intel_broadwell_gt3d_info = { static const struct intel_device_info intel_broadwell_gt3m_info = { .gen = 8, .is_mobile = 1, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 92c3095..74aef6a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) +#define BSD2_RING (1VCS2) #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BSD2(dev) (INTEL_INFO(dev)-ring_mask BSD2_RING) #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85c9cf0..65c441c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_blt_ring; } + if (HAS_BSD2(dev)) { + ret = intel_init_bsd2_ring_buffer(dev); + if (ret) + goto cleanup_vebox_ring; + } ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_vebox_ring; + goto cleanup_bsd2_ring; return 0; +cleanup_bsd2_ring: + intel_cleanup_ring_buffer(dev_priv-ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(dev_priv-ring[VECS]); cleanup_blt_ring: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4865ade..282164c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,6 +42,7 @@ static const char *ring_str(int ring) case VCS: return bsd; case BCS: return blt; case VECS: return vebox; + case VCS2: return bsd2; default: return ; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f84555..0b88508 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -760,6 +760,7 @@ enum punit_power_well { #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 +#define GEN8_BSD2_RING_BASE0x1c000 #define VEBOX_RING_BASE0x1a000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base)((base)+0x30) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb3dd26..7e64ab6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1920,14 +1920,22 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring-get_seqno = gen6_ring_get_seqno; ring-set_seqno = ring_set_seqno; ring-sync_to = gen6_ring_sync; + /* +* The current semaphore is only applied on pre
[Intel-gfx] [PATCH V3 6/6] drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3
The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middle. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case it can play back video stream while encoding another video stream. The coarse ping-pong mechanism is used to determine which BSD ring is used to dispatch the BSD video command. V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism. This is only to add the support of dual BSD rings on BDW GT3 machine. The further optimization will be considered in another patch set. V2-V3: Follow Daniel's comment to use the struct_mutext instead of atomic_t during determining which ring can be used to dispatch Video command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_dma.c|3 +++ drivers/gpu/drm/i915/i915_drv.h|3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 40 +++- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0b38f88..f7558f5 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(dev_priv-backlight_lock); spin_lock_init(dev_priv-uncore.lock); spin_lock_init(dev_priv-mm.object_stat_lock); + dev_priv-ring_index = 0; mutex_init(dev_priv-dpio_lock); mutex_init(dev_priv-modeset_restore_lock); @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; + if (file_priv file_priv-bsd_ring) + file_priv-bsd_ring = NULL; kfree(file_priv); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74aef6a..032f992 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1472,6 +1472,8 @@ struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* the indicator for dispatch video commands on two BSD rings */ + int ring_index; }; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -1679,6 +1681,7 @@ struct drm_i915_file_private { struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; + struct intel_ring_buffer *bsd_ring; }; /* diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 341ec68..1dc6f03 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,6 +999,37 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +/** + * Find one BSD ring to dispatch the corresponding BSD command. + * The Ring ID is returned. + */ +static int gen8_dispatch_bsd_ring(struct drm_device *dev, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_file_private *file_priv = file-driver_priv; + + /* Check whether the file_priv is using one ring */ + if (file_priv-bsd_ring) + return file_priv-bsd_ring-id; + else { + /* If no, use the ping-pong mechanism to select one ring */ + int ring_id; + + mutex_lock(dev-struct_mutex); + if (dev_priv-ring_index == 0) { + ring_id = VCS; + dev_priv-ring_index = 1; + } else { + ring_id = VCS2; + dev_priv-ring_index = 0; + } + file_priv-bsd_ring = dev_priv-ring[ring_id]; + mutex_unlock(dev-struct_mutex); + return ring_id; + } +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1043,7 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) ring = dev_priv-ring[RCS]; - else + else if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_BSD) { + if (HAS_BSD2(dev)) { + int ring_id; + ring_id = gen8_dispatch_bsd_ring(dev, file); + ring = dev_priv-ring[ring_id]; + } else + ring = dev_priv-ring[VCS]; + } else ring = dev_priv-ring[(args-flags I915_EXEC_RING_MASK) - 1]; if (!intel_ring_initialized(ring)) { -- 1.7.10.1
[Intel-gfx] [PATCH V3 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine
Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c |4 +-- drivers/gpu/drm/i915/i915_drv.h |2 ++ drivers/gpu/drm/i915/i915_gem.c |9 +- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 54 +++ drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++- 7 files changed, 71 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 17fbbe5..2a7842b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -282,7 +282,7 @@ static const struct intel_device_info intel_broadwell_m_info = { static const struct intel_device_info intel_broadwell_gt3d_info = { .gen = 8, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, @@ -292,7 +292,7 @@ static const struct intel_device_info intel_broadwell_gt3d_info = { static const struct intel_device_info intel_broadwell_gt3m_info = { .gen = 8, .is_mobile = 1, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 92c3095..74aef6a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) +#define BSD2_RING (1VCS2) #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BSD2(dev) (INTEL_INFO(dev)-ring_mask BSD2_RING) #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85c9cf0..b4dcf2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_blt_ring; } + if (HAS_BSD2(dev)) { + ret = intel_init_bsd2_ring_buffer(dev); + if (ret) + goto cleanup_vebox_ring; + } ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_vebox_ring; + goto cleanup_ring; return 0; +cleanup_ring: + intel_cleanup_ring_buffer(dev_priv-ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(dev_priv-ring[VECS]); cleanup_blt_ring: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4865ade..3cab7f9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,6 +42,7 @@ static const char *ring_str(int ring) case VCS: return bsd; case BCS: return blt; case VECS: return vebox; + case VCS2: return second bsd; default: return ; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f84555..0b88508 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -760,6 +760,7 @@ enum punit_power_well { #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 +#define GEN8_BSD2_RING_BASE0x1c000 #define VEBOX_RING_BASE0x1a000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base)((base)+0x30) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb3dd26..8b9b89080 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1924,10 +1924,12 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV; ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB; ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE; + ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID; ring-signal_mbox[RCS] = GEN6_NOSYNC; ring-signal_mbox[VCS] = GEN6_VRSYNC
[Intel-gfx] [PATCH V3 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3
Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. V1-V2: Follow Daniel's comment to pay attention to the stolen check for BDW in kernel/early-quirks.c Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 26 -- include/drm/i915_pciids.h | 22 +- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5d8250f..17fbbe5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -311,8 +331,10 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_HSW_M_IDS(intel_haswell_m_info), \ INTEL_VLV_M_IDS(intel_valleyview_m_info), \ INTEL_VLV_D_IDS(intel_valleyview_d_info), \ - INTEL_BDW_M_IDS(intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(intel_broadwell_d_info) + INTEL_BDW_GT12M_IDS(intel_broadwell_m_info), \ + INTEL_BDW_GT12D_IDS(intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4..24f3cad 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -223,14 +223,26 @@ _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ -#define INTEL_BDW_M_IDS(info) \ +#define INTEL_BDW_GT12M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) -#define INTEL_BDW_D_IDS(info) \ +#define INTEL_BDW_GT12D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) +#define INTEL_BDW_M_IDS(info) \ + INTEL_BDW_GT12M_IDS(info), \ + INTEL_BDW_GT3M_IDS(info) + +#define INTEL_BDW_D_IDS(info) \ + INTEL_BDW_GT12D_IDS(info), \ + INTEL_BDW_GT3D_IDS(info) + #endif /* _I915_PCIIDS_H */ -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 5/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space
One extra ring is added in the kernel driver but it is transparent to the user-space application/middleware. In such case the number of the rings in kernel driver is bigger than that exported to the user-space. So it needs to filter out the wrong Ring ID passed by user-space. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +- drivers/gpu/drm/i915/intel_ringbuffer.h|1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3491402..341ec68 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args-flags I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; - if ((args-flags I915_EXEC_RING_MASK) I915_NUM_RINGS) { + if ((args-flags I915_EXEC_RING_MASK) LAST_USER_RING) { DRM_DEBUG(execbuf with unknown ring: %d\n, (int)(args-flags I915_EXEC_RING_MASK)); return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8ca4285..59f4cdd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -64,6 +64,7 @@ struct intel_ring_buffer { VCS2, } id; #define I915_NUM_RINGS 5 +#define LAST_USER_RING (VECS + 1) u32 mmio_base; void__iomem *virtual_start; struct drm_device *dev; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3
This is the patch set that tries to add the support of dual BSD rings on BDW GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. In such case the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. V1-V2: Follow Daniel's comment to do the following update: a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01 b. update the comment in Patch 04 c. use the simple ping-pong mechanism to add the support of dual BSD rings. The further optimization will be considered in another patch set. V2-V3: Follow Daniel's comment to use the struct_mutext instead of atomic_t during determining which ring can be used to dispatch Video command. Zhao Yakui (6): drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3 drm/i915:Initialize the second BSD ring on BDW GT3 machine drm/i915:Handle the irq interrupt for the second BSD ring drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3 drivers/gpu/drm/i915/i915_dma.c|3 ++ drivers/gpu/drm/i915/i915_drv.c| 26 +++- drivers/gpu/drm/i915/i915_drv.h|5 +++ drivers/gpu/drm/i915/i915_gem.c|9 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 42 +++- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_irq.c|5 ++- drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 59 drivers/gpu/drm/i915/intel_ringbuffer.h|5 ++- include/drm/i915_pciids.h | 22 --- 11 files changed, 166 insertions(+), 12 deletions(-) -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 4/6] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning
The Gen7 doesn't have the second BSD ring. But it will complain the switch check warning message during compilation. So just add it to remove the switch check warning. V1-V2: Follow Daniel's comment to update the comment Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c |5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8b9b89080..2c89525 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) case BCS: mmio = BLT_HWS_PGA_GEN7; break; + /* +* VCS2 actually doesn't exist on Gen7. Only shut up +* gcc switch check warning +*/ + case VCS2: case VCS: mmio = BSD_HWS_PGA_GEN7; break; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3
Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. V1-V2: Follow Daniel's comment to pay attention to the stolen check for BDW in kernel/early-quirks.c Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 26 -- include/drm/i915_pciids.h | 22 +- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5d8250f..17fbbe5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -311,8 +331,10 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_HSW_M_IDS(intel_haswell_m_info), \ INTEL_VLV_M_IDS(intel_valleyview_m_info), \ INTEL_VLV_D_IDS(intel_valleyview_d_info), \ - INTEL_BDW_M_IDS(intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(intel_broadwell_d_info) + INTEL_BDW_GT12M_IDS(intel_broadwell_m_info), \ + INTEL_BDW_GT12D_IDS(intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4..24f3cad 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -223,14 +223,26 @@ _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ -#define INTEL_BDW_M_IDS(info) \ +#define INTEL_BDW_GT12M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) -#define INTEL_BDW_D_IDS(info) \ +#define INTEL_BDW_GT12D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) +#define INTEL_BDW_M_IDS(info) \ + INTEL_BDW_GT12M_IDS(info), \ + INTEL_BDW_GT3M_IDS(info) + +#define INTEL_BDW_D_IDS(info) \ + INTEL_BDW_GT12D_IDS(info), \ + INTEL_BDW_GT3D_IDS(info) + #endif /* _I915_PCIIDS_H */ -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 4/6] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning
The Gen7 doesn't have the second BSD ring. But it will complain the switch check warning message during compilation. So just add it to remove the switch check warning. V1-V2: Follow Daniel's comment to update the comment Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c |5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8b9b89080..2c89525 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) case BCS: mmio = BLT_HWS_PGA_GEN7; break; + /* +* VCS2 actually doesn't exist on Gen7. Only shut up +* gcc switch check warning +*/ + case VCS2: case VCS: mmio = BSD_HWS_PGA_GEN7; break; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 6/6] drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3
The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middle. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case it can play back video stream while encoding another video stream. The coarse ping-pong mechanism is used to determine which BSD ring is used to dispatch the BSD video command. V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism. This is only to add the support of dual BSD rings on BDW GT3 machine. The further optimization will be considered in another patch set. V2-V3: Follow Daniel's comment to use the struct_mutext instead of atomic_t during determining which ring can be used to dispatch Video command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_dma.c|3 +++ drivers/gpu/drm/i915/i915_drv.h|3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 40 +++- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0b38f88..f7558f5 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(dev_priv-backlight_lock); spin_lock_init(dev_priv-uncore.lock); spin_lock_init(dev_priv-mm.object_stat_lock); + dev_priv-ring_index = 0; mutex_init(dev_priv-dpio_lock); mutex_init(dev_priv-modeset_restore_lock); @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; + if (file_priv file_priv-bsd_ring) + file_priv-bsd_ring = NULL; kfree(file_priv); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74aef6a..032f992 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1472,6 +1472,8 @@ struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* the indicator for dispatch video commands on two BSD rings */ + int ring_index; }; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -1679,6 +1681,7 @@ struct drm_i915_file_private { struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; + struct intel_ring_buffer *bsd_ring; }; /* diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 341ec68..1dc6f03 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,6 +999,37 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +/** + * Find one BSD ring to dispatch the corresponding BSD command. + * The Ring ID is returned. + */ +static int gen8_dispatch_bsd_ring(struct drm_device *dev, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_file_private *file_priv = file-driver_priv; + + /* Check whether the file_priv is using one ring */ + if (file_priv-bsd_ring) + return file_priv-bsd_ring-id; + else { + /* If no, use the ping-pong mechanism to select one ring */ + int ring_id; + + mutex_lock(dev-struct_mutex); + if (dev_priv-ring_index == 0) { + ring_id = VCS; + dev_priv-ring_index = 1; + } else { + ring_id = VCS2; + dev_priv-ring_index = 0; + } + file_priv-bsd_ring = dev_priv-ring[ring_id]; + mutex_unlock(dev-struct_mutex); + return ring_id; + } +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1043,7 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) ring = dev_priv-ring[RCS]; - else + else if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_BSD) { + if (HAS_BSD2(dev)) { + int ring_id; + ring_id = gen8_dispatch_bsd_ring(dev, file); + ring = dev_priv-ring[ring_id]; + } else + ring = dev_priv-ring[VCS]; + } else ring = dev_priv-ring[(args-flags I915_EXEC_RING_MASK) - 1]; if (!intel_ring_initialized(ring)) { -- 1.7.10.1
[Intel-gfx] [PATCH V3 5/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space
One extra ring is added in the kernel driver but it is transparent to the user-space application/middleware. In such case the number of the rings in kernel driver is bigger than that exported to the user-space. So it needs to filter out the wrong Ring ID passed by user-space. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +- drivers/gpu/drm/i915/intel_ringbuffer.h|1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3491402..341ec68 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args-flags I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; - if ((args-flags I915_EXEC_RING_MASK) I915_NUM_RINGS) { + if ((args-flags I915_EXEC_RING_MASK) LAST_USER_RING) { DRM_DEBUG(execbuf with unknown ring: %d\n, (int)(args-flags I915_EXEC_RING_MASK)); return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8ca4285..59f4cdd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -64,6 +64,7 @@ struct intel_ring_buffer { VCS2, } id; #define I915_NUM_RINGS 5 +#define LAST_USER_RING (VECS + 1) u32 mmio_base; void__iomem *virtual_start; struct drm_device *dev; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3
This is the patch set that tries to add the support of dual BSD rings on BDW GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. In such case the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. V1-V2: Follow Daniel's comment to do the following update: a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01 b. update the comment in Patch 04 c. use the simple ping-pong mechanism to add the support of dual BSD rings. The further optimization will be considered in another patch set. V2-V3: Follow Daniel's comment to use the struct_mutext instead of atomic_t during determining which ring can be used to dispatch Video command. Zhao Yakui (6): drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3 drm/i915:Initialize the second BSD ring on BDW GT3 machine drm/i915:Handle the irq interrupt for the second BSD ring drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space drm/i915: Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3 drivers/gpu/drm/i915/i915_dma.c|3 ++ drivers/gpu/drm/i915/i915_drv.c| 26 +++- drivers/gpu/drm/i915/i915_drv.h|5 +++ drivers/gpu/drm/i915/i915_gem.c|9 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 42 +++- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_irq.c|5 ++- drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 59 drivers/gpu/drm/i915/intel_ringbuffer.h|5 ++- include/drm/i915_pciids.h | 22 --- 11 files changed, 166 insertions(+), 12 deletions(-) -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 3/6] drm/i915:Handle the irq interrupt for the second BSD ring
Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_irq.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7a4d3ae..63bd5de 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, DRM_ERROR(The master control interrupt lied (GT0)!\n); } - if (master_ctl GEN8_GT_VCS1_IRQ) { + if (master_ctl (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { tmp = I915_READ(GEN8_GT_IIR(1)); if (tmp) { ret = IRQ_HANDLED; vcs = tmp GEN8_VCS1_IRQ_SHIFT; if (vcs GT_RENDER_USER_INTERRUPT) notify_ring(dev, dev_priv-ring[VCS]); + vcs = tmp GEN8_VCS2_IRQ_SHIFT; + if (vcs GT_RENDER_USER_INTERRUPT) + notify_ring(dev, dev_priv-ring[VCS2]); I915_WRITE(GEN8_GT_IIR(1), tmp); } else DRM_ERROR(The master control interrupt lied (GT1)!\n); -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V3 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine
Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c |4 +-- drivers/gpu/drm/i915/i915_drv.h |2 ++ drivers/gpu/drm/i915/i915_gem.c |9 +- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 54 +++ drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++- 7 files changed, 71 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 17fbbe5..2a7842b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -282,7 +282,7 @@ static const struct intel_device_info intel_broadwell_m_info = { static const struct intel_device_info intel_broadwell_gt3d_info = { .gen = 8, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, @@ -292,7 +292,7 @@ static const struct intel_device_info intel_broadwell_gt3d_info = { static const struct intel_device_info intel_broadwell_gt3m_info = { .gen = 8, .is_mobile = 1, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 92c3095..74aef6a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1833,7 +1833,9 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) +#define BSD2_RING (1VCS2) #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BSD2(dev) (INTEL_INFO(dev)-ring_mask BSD2_RING) #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85c9cf0..b4dcf2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_blt_ring; } + if (HAS_BSD2(dev)) { + ret = intel_init_bsd2_ring_buffer(dev); + if (ret) + goto cleanup_vebox_ring; + } ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_vebox_ring; + goto cleanup_ring; return 0; +cleanup_ring: + intel_cleanup_ring_buffer(dev_priv-ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(dev_priv-ring[VECS]); cleanup_blt_ring: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4865ade..3cab7f9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,6 +42,7 @@ static const char *ring_str(int ring) case VCS: return bsd; case BCS: return blt; case VECS: return vebox; + case VCS2: return second bsd; default: return ; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f84555..0b88508 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -760,6 +760,7 @@ enum punit_power_well { #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 +#define GEN8_BSD2_RING_BASE0x1c000 #define VEBOX_RING_BASE0x1a000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base)((base)+0x30) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb3dd26..8b9b89080 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1924,10 +1924,12 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV; ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB; ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE; + ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID; ring-signal_mbox[RCS] = GEN6_NOSYNC; ring-signal_mbox[VCS] = GEN6_VRSYNC
Re: [Intel-gfx] [PATCH V2 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3
On Mon, 2014-04-14 at 01:09 -0600, Daniel Vetter wrote: On Mon, Apr 14, 2014 at 12:21:39PM +0800, Zhao Yakui wrote: V1-V2: Follow Daniel's comment to consider the stolen check for BDW in kernel/early-quirks.c Small style nit: We usually put the patch changelog at the end of the commit message. That way the core commit message is clearly separated from the per-patch changelog. In rare cases there's some confusion otherwise. No need to resend just for that. Thanks for your advice. I will pay attention to the style nit next time. Thanks. Yakui -Daniel Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 26 -- include/drm/i915_pciids.h | 22 +- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5d8250f..17fbbe5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -311,8 +331,10 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_HSW_M_IDS(intel_haswell_m_info), \ INTEL_VLV_M_IDS(intel_valleyview_m_info), \ INTEL_VLV_D_IDS(intel_valleyview_d_info), \ - INTEL_BDW_M_IDS(intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(intel_broadwell_d_info) + INTEL_BDW_GT12M_IDS(intel_broadwell_m_info), \ + INTEL_BDW_GT12D_IDS(intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4..24f3cad 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -223,14 +223,26 @@ _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ -#define INTEL_BDW_M_IDS(info) \ +#define INTEL_BDW_GT12M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) -#define INTEL_BDW_D_IDS(info) \ +#define INTEL_BDW_GT12D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) +#define INTEL_BDW_M_IDS(info) \ + INTEL_BDW_GT12M_IDS(info), \ + INTEL_BDW_GT3M_IDS(info) + +#define INTEL_BDW_D_IDS(info) \ + INTEL_BDW_GT12D_IDS(info), \ + INTEL_BDW_GT3D_IDS(info) + #endif /* _I915_PCIIDS_H */ -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings
On Mon, 2014-04-14 at 01:06 -0600, Daniel Vetter wrote: On Mon, Apr 14, 2014 at 12:19:58PM +0800, Zhao Yakui wrote: The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the CPU-GPU sync for the second BSD ring. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/Makefile.sources|1 + tests/gem_dummy_reloc_multi_bsd.c | 258 + I've meant that you add a new subtest to the existing gem_dummy_reloc test. With your patch here we essentially duplicate all the tests for the other rings. 2 files changed, 259 insertions(+) create mode 100644 tests/gem_dummy_reloc_multi_bsd.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 254a5c5..98f277f 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -105,6 +105,7 @@ TESTS_progs = \ gem_ring_sync_copy \ gem_ring_sync_loop \ gem_multi_bsd_sync_loop \ + gem_dummy_reloc_multi_bsd \ Tests with subtests must be added to the TESTS_progs_M variable, otherwise piglit won't be able to enumerate the subtests. That's just an fyi for the next testcase, like I've said here it's imo better to just add a new subtest. Thanks for the rules about how to add the test with subtests.(Sorry that I don't know this rule) OK. I will follow your comment to add it as subtests. Thanks. Yakui Also you've forgotten to update .gitignore, when building with your patch git status shows some not-added binaries. -Daniel gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_dummy_reloc_multi_bsd.c b/tests/gem_dummy_reloc_multi_bsd.c new file mode 100644 index 000..ef8213e --- /dev/null +++ b/tests/gem_dummy_reloc_multi_bsd.c @@ -0,0 +1,258 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_dummy_reloc_loop*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +#define LOCAL_I915_EXEC_VEBOX (40) + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-cpu sync using a dummy reloc under multi-fd + * + * The last test (that randomly switches the ring) seems to be pretty effective + * at hitting the missed irq bug that's worked around with the HWSTAM irq write. + */ + + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE (121) +static void +dummy_reloc_loop(int ring) +{ + int i; + srandom(0xdeadbeef); + + for (i = 0; i 0x10; i++) { + int mindex = random() % NUM_FD; + + batch = mbatch[mindex]; + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword
Re: [Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings
On Mon, 2014-04-14 at 01:06 -0600, Daniel Vetter wrote: On Mon, Apr 14, 2014 at 12:19:58PM +0800, Zhao Yakui wrote: The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the CPU-GPU sync for the second BSD ring. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/Makefile.sources|1 + tests/gem_dummy_reloc_multi_bsd.c | 258 + I've meant that you add a new subtest to the existing gem_dummy_reloc test. With your patch here we essentially duplicate all the tests for the other rings. 2 files changed, 259 insertions(+) create mode 100644 tests/gem_dummy_reloc_multi_bsd.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 254a5c5..98f277f 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -105,6 +105,7 @@ TESTS_progs = \ gem_ring_sync_copy \ gem_ring_sync_loop \ gem_multi_bsd_sync_loop \ + gem_dummy_reloc_multi_bsd \ Tests with subtests must be added to the TESTS_progs_M variable, otherwise piglit won't be able to enumerate the subtests. That's just an fyi for the next testcase, like I've said here it's imo better to just add a new subtest. Thanks for the rules about how to add the test with subtests.(Sorry that I don't know this rule) OK. I will follow your comment to add it as subtests. Also you've forgotten to update .gitignore, when building with your patch git status shows some not-added binaries. BTW: How do I update the .gitigonre? In my test I usually use the following step to create the corresponding patches before sending and never update the .gitignore. a. use quilt tool to create it b. use git am to apply the corresponding patch on the working tree c. use git format-patch to get the corresponding patches that can be sent by using git-send-email Appreciate your helps. Thanks. Yakui -Daniel gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_dummy_reloc_multi_bsd.c b/tests/gem_dummy_reloc_multi_bsd.c new file mode 100644 index 000..ef8213e --- /dev/null +++ b/tests/gem_dummy_reloc_multi_bsd.c @@ -0,0 +1,258 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_dummy_reloc_loop*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +#define LOCAL_I915_EXEC_VEBOX (40) + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-cpu sync using a dummy reloc under multi-fd + * + * The last test (that randomly switches the ring) seems to be pretty effective + * at hitting the missed irq bug that's worked around with the HWSTAM irq write. + */ + + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE (121) +static void +dummy_reloc_loop(int ring
Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3
On Mon, 2014-04-14 at 01:22 -0600, Daniel Vetter wrote: On Mon, Apr 14, 2014 at 12:21:44PM +0800, Zhao Yakui wrote: V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism. This is only to add the support of dual BSD rings on BDW GT3 machine. The further optimization will be considered in another patch set. The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middle. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case it can play back video stream while encoding another video stream. The coarse ping-pong mechanism is used to determine which BSD ring is used to dispatch the BSD video command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_dma.c|3 +++ drivers/gpu/drm/i915/i915_drv.h|3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 37 +++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0b38f88..4d27cf4 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(dev_priv-backlight_lock); spin_lock_init(dev_priv-uncore.lock); spin_lock_init(dev_priv-mm.object_stat_lock); + atomic_set(dev_priv-bsd_cmd_counter, 0); mutex_init(dev_priv-dpio_lock); mutex_init(dev_priv-modeset_restore_lock); @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; + if (file_priv file_priv-bsd_ring) + file_priv-bsd_ring = NULL; kfree(file_priv); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ac5598c3..68e8166 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1466,6 +1466,8 @@ struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* the lock for dispatch video commands on two BSD rings */ + atomic_t bsd_cmd_counter; You're still using atomic_t for no real good reason. gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock held, so there's really no reason for it. If the struct_mutex is used in the gen8_dispatch_bsd_ring, I can remove the atomic_t. It seems that the struct_mutex is a big lock and it is used very frequently(i915_gem.c, i915_dma.c and so on). In my point it is a little heavier than the atomic_t if one counter is increased and returned. If you think that the mutex is better than atomic, I will follow your advice. Thanks. Yakui -Daniel }; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -1673,6 +1675,7 @@ struct drm_i915_file_private { struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; + struct intel_ring_buffer *bsd_ring; }; /* diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 341ec68..720ef17 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,6 +999,34 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +/** + * Find one BSD ring to dispatch the corresponding BSD command. + * The Ring ID is returned. + */ +static int gen8_dispatch_bsd_ring(struct drm_device *dev, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_file_private *file_priv = file-driver_priv; + + /* Check whether the file_priv is using one ring */ + if (file_priv-bsd_ring) + return file_priv-bsd_ring-id; + else { + /* If no, use the ping-pong mechanism to select one ring */ + int counter, ring_id; + smp_mb__before_atomic_inc(); + counter = atomic_inc_return(dev_priv-bsd_cmd_counter); + if (counter % 2 == 0) + ring_id = VCS; + else + ring_id = VCS2; + + file_priv-bsd_ring = dev_priv-ring[ring_id]; + return ring_id; + } +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1043,7 +1071,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if ((args-flags I915_EXEC_RING_MASK
Re: [Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings
On Mon, 2014-04-14 at 01:55 -0600, Daniel Vetter wrote: On Mon, Apr 14, 2014 at 9:32 AM, Zhao Yakui yakui.z...@intel.com wrote: BTW: How do I update the .gitigonre? In my test I usually use the following step to create the corresponding patches before sending and never update the .gitignore. a. use quilt tool to create it b. use git am to apply the corresponding patch on the working tree c. use git format-patch to get the corresponding patches that can be sent by using git-send-email It's a normal file in the corresponding directory. You can just edit it and add it to the patch. Thanks. Yakui -Daniel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3
On Mon, 2014-04-14 at 02:19 -0600, Chris Wilson wrote: On Mon, Apr 14, 2014 at 04:05:19PM +0800, Zhao Yakui wrote: On Mon, 2014-04-14 at 01:22 -0600, Daniel Vetter wrote: You're still using atomic_t for no real good reason. gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock held, so there's really no reason for it. If the struct_mutex is used in the gen8_dispatch_bsd_ring, I can remove the atomic_t. It seems that the struct_mutex is a big lock and it is used very frequently(i915_gem.c, i915_dma.c and so on). In my point it is a little heavier than the atomic_t if one counter is increased and returned. If you think that the mutex is better than atomic, I will follow your advice. You are already holding the struct_mutex whenever we touch the ring and execbuffer. Even in a fine-grained world, there will still be a mutex around all operations that touch the rings. Hi, Chris I understand your concern. From the source code the struct_mutex will be held when trying to do the buffer relocation and dispatch the command in one ring. But my code is only to select one BSD ring. In such case the atomic_t usage is enough and it is unnecessary to hold the struct_mutex. If you also think that the struct_mutex is better, I can update the code to use the struct_mutex. Thanks. Yakui -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t V2 2/2] tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the CPU-GPU sync for the second BSD ring. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/gem_dummy_reloc_loop.c | 102 +- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c index a61b59b..660d8e1 100644 --- a/tests/gem_dummy_reloc_loop.c +++ b/tests/gem_dummy_reloc_loop.c @@ -48,6 +48,13 @@ static drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; static drm_intel_bo *target_buffer; +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + /* * Testcase: Basic check of ring-cpu sync using a dummy reloc * @@ -124,6 +131,50 @@ dummy_reloc_loop_random_ring(int num_rings) } } +static void +dummy_reloc_loop_random_ring_multi_fd(int num_rings) +{ + int i; + struct intel_batchbuffer *saved_batch; + + saved_batch = batch; + + srandom(0xdeadbeef); + + for (i = 0; i 0x10; i++) { + int mindex; + int ring = random() % num_rings + 1; + + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + + drm_intel_bo_map(target_buffer, 0); + // map to force waiting on rendering + drm_intel_bo_unmap(target_buffer); + } + + batch = saved_batch; +} + int fd; int devid; int num_rings; @@ -133,6 +184,7 @@ igt_main igt_skip_on_simulation(); igt_fixture { + int i; fd = drm_open_any(); devid = intel_get_drm_devid(fd); num_rings = gem_get_num_rings(fd); @@ -148,6 +200,35 @@ igt_main target_buffer = drm_intel_bo_alloc(bufmgr, target bo, 4096, 4096); igt_assert(target_buffer); + + /* Create multi drm_fd and map one gem object to multi gem_contexts */ + { + unsigned int target_flink; + char buffer_name[32]; + if (dri_bo_flink(target_buffer, target_flink)) { + printf(fail to get flink for target buffer\n); + igt_assert(0); + } + for (i = 0; i NUM_FD; i++) { + mfd[i] = 0; + mbufmgr[i] = NULL; + mbuffer[i] = NULL; + } + for (i = 0; i NUM_FD; i++) { + sprintf(buffer_name, Target buffer %d\n, i); + mfd[i] = drm_open_any(); + mbufmgr[i] = drm_intel_bufmgr_gem_init(mfd[i], 4096); + igt_assert(mbufmgr[i]); + drm_intel_bufmgr_gem_enable_reuse(mbufmgr[i]); + mbatch[i] = intel_batchbuffer_alloc(mbufmgr[i], devid); + igt_assert(mbufmgr[i]); + mbuffer[i] = intel_bo_gem_create_from_name( + mbufmgr[i], + buffer_name, + target_flink); + igt_assert(mbuffer[i]); + } + } } igt_subtest(render) { @@ -190,8 +271,27 @@ igt_main printf(dummy loop run on random rings completed\n); } } - + igt_subtest(mixed_multi_fd
[Intel-gfx] [PATCH I-g-t V2 0/2] Tests: Add test cases based on multi drm_fd to test sync
This follows Daniel's advice to add the two test cases based on multi drm_fd to test the ring sync and CPU-GPU sync. The Broadwell GT3 machine has two independent BSD rings that can be used to process the video commands. This is implemented in kernel driver and transparent to the user-space. But we still need to check the ring sync and CPU-GPU sync for the second BSD ring. Two tests are created based on the multi drm_fds to test the sync. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. V1-V2: Follow Daniel's comment to add one subtext instead of one individual test case, which is used to test the CPU-GPU sync under multi BSD rings/ Zhao Yakui (2): tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings tests/gem_dummy_reloc_loop: Add one subtest based on multi drm_fd to test CPU-GPU sync under multi BSD rings tests/Makefile.sources |1 + tests/gem_dummy_reloc_loop.c| 102 ++- tests/gem_multi_bsd_sync_loop.c | 172 +++ 3 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 tests/gem_multi_bsd_sync_loop.c -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t V2 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync under multi BSD rings
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the ring sync between the two BSD rings. At the same time it also needs to check the sync among the second BSD ring and the other rings. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/Makefile.sources |1 + tests/gem_multi_bsd_sync_loop.c | 172 +++ 2 files changed, 173 insertions(+) create mode 100644 tests/gem_multi_bsd_sync_loop.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index c957ace..7cd9ca8 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -105,6 +105,7 @@ TESTS_progs = \ gem_render_tiled_blits \ gem_ring_sync_copy \ gem_ring_sync_loop \ + gem_multi_bsd_sync_loop \ gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c new file mode 100644 index 000..7f5b832 --- /dev/null +++ b/tests/gem_multi_bsd_sync_loop.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-ring sync using a dummy reloc + * + * Extremely efficient at catching missed irqs with semaphores=0 ... + */ + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE (121) + +static void +store_dword_loop(int fd) +{ + int i; + int num_rings = gem_get_num_rings(fd); + + srandom(0xdeadbeef); + + for (i = 0; i SLOW_QUICK(0x10, 10); i++) { + int ring, mindex; + ring = random() % num_rings + 1; + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + } + + drm_intel_bo_map(target_buffer, 0); + // map to force waiting on rendering + drm_intel_bo_unmap(target_buffer); +} + +igt_simple_main +{ + int fd; + int devid; + int i; + + fd = drm_open_any(); + devid = intel_get_drm_devid(fd); + gem_require_ring(fd, I915_EXEC_BLT
Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3
On Fri, 2014-04-11 at 02:57 -0600, Daniel Vetter wrote: On Fri, Apr 11, 2014 at 08:56:28AM +0800, Zhao Yakui wrote: On Thu, 2014-04-10 at 03:04 -0600, Daniel Vetter wrote: On Thu, Apr 10, 2014 at 04:28:34PM +0800, Zhao Yakui wrote: BTW: Does it need to check all the flags defined in i915_drm.h or the exported flag returned by i915_get_parameter? I don't have i915_get_parameter anywhere in my sources, so no idea what you mean ... Sorry that the function should be i915_getparam. It is called by the I915_GETPARAM ioctl to query the flag supported by the driver. Ah, now I understand. The idea is to test all fields of the structure exhaustively (so also rsvd to make sure it's 0). Well except for the buffer count field since we have tests for that already. For the reasons see my two blog posts on the topic: http://blog.ffwll.ch/2013/11/testing-requirements-for-drmi915.html http://blog.ffwll.ch/2013/11/botching-up-ioctls.html OK. It seems that the case needs to check more fields than the exported flag. I will take a look at your blog and understand how to write the test case. Thanks. Yakui Cheers, Daniel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH I-g-t 2/2] tests: Add dummy_reloc test case based on multi drm_fd to test CPU-GPU sync under multi BSD rings
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the CPU-GPU sync for the second BSD ring. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/Makefile.sources|1 + tests/gem_dummy_reloc_multi_bsd.c | 258 + 2 files changed, 259 insertions(+) create mode 100644 tests/gem_dummy_reloc_multi_bsd.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 254a5c5..98f277f 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -105,6 +105,7 @@ TESTS_progs = \ gem_ring_sync_copy \ gem_ring_sync_loop \ gem_multi_bsd_sync_loop \ + gem_dummy_reloc_multi_bsd \ gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_dummy_reloc_multi_bsd.c b/tests/gem_dummy_reloc_multi_bsd.c new file mode 100644 index 000..ef8213e --- /dev/null +++ b/tests/gem_dummy_reloc_multi_bsd.c @@ -0,0 +1,258 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_dummy_reloc_loop*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +#define LOCAL_I915_EXEC_VEBOX (40) + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-cpu sync using a dummy reloc under multi-fd + * + * The last test (that randomly switches the ring) seems to be pretty effective + * at hitting the missed irq bug that's worked around with the HWSTAM irq write. + */ + + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE (121) +static void +dummy_reloc_loop(int ring) +{ + int i; + srandom(0xdeadbeef); + + for (i = 0; i 0x10; i++) { + int mindex = random() % NUM_FD; + + batch = mbatch[mindex]; + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(0); /* reserved */ + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + + drm_intel_bo_map(target_buffer, 0); + // map to force completion + drm_intel_bo_unmap(target_buffer); + } +} + +static void +dummy_reloc_loop_random_ring(int num_rings) +{ + int i; + + srandom(0xdeadbeef); + + for (i = 0; i 0x10; i
[Intel-gfx] [PATCH I-g-t 1/2] tests: Add one ring sync case based on multi drm_fd to test ring semaphore sync
The Broadwell GT3 machine has two independent BSD rings in kernel driver while it is transparent to the user-space driver. In such case it needs to check the ring sync between the two BSD rings. At the same time it also needs to check the sync among the second BSD ring and the other rings. Multi drm_fd can assure that the second BSD ring has the opportunity to dispatch the GPU command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- tests/Makefile.sources |1 + tests/gem_multi_bsd_sync_loop.c | 172 +++ 2 files changed, 173 insertions(+) create mode 100644 tests/gem_multi_bsd_sync_loop.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index bf02a48..254a5c5 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -104,6 +104,7 @@ TESTS_progs = \ gem_render_tiled_blits \ gem_ring_sync_copy \ gem_ring_sync_loop \ + gem_multi_bsd_sync_loop \ gem_seqno_wrap \ gem_set_tiling_vs_gtt \ gem_set_tiling_vs_pwrite \ diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c new file mode 100644 index 000..7f5b832 --- /dev/null +++ b/tests/gem_multi_bsd_sync_loop.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Daniel Vetter daniel.vet...@ffwll.ch (based on gem_ring_sync_loop_*.c) + *Zhao Yakui yakui.z...@intel.com + * + */ + +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/time.h +#include drm.h +#include ioctl_wrappers.h +#include drmtest.h +#include intel_bufmgr.h +#include intel_batchbuffer.h +#include intel_io.h +#include i830_reg.h +#include intel_chipset.h + +static drm_intel_bufmgr *bufmgr; +struct intel_batchbuffer *batch; +static drm_intel_bo *target_buffer; + +#define NUM_FD 50 + +static int mfd[NUM_FD]; +static drm_intel_bufmgr *mbufmgr[NUM_FD]; +static struct intel_batchbuffer *mbatch[NUM_FD]; +static drm_intel_bo *mbuffer[NUM_FD]; + + +/* + * Testcase: Basic check of ring-ring sync using a dummy reloc + * + * Extremely efficient at catching missed irqs with semaphores=0 ... + */ + +#define MI_COND_BATCH_BUFFER_END (0x3623 | 1) +#define MI_DO_COMPARE (121) + +static void +store_dword_loop(int fd) +{ + int i; + int num_rings = gem_get_num_rings(fd); + + srandom(0xdeadbeef); + + for (i = 0; i SLOW_QUICK(0x10, 10); i++) { + int ring, mindex; + ring = random() % num_rings + 1; + mindex = random() % NUM_FD; + batch = mbatch[mindex]; + if (ring == I915_EXEC_RENDER) { + BEGIN_BATCH(4); + OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); + OUT_BATCH(0x); /* compare dword */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(MI_FLUSH_DW | 1); + OUT_BATCH(0); /* reserved */ + OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(MI_NOOP | (122) | (0xf)); + ADVANCE_BATCH(); + } + intel_batchbuffer_flush_on_ring(batch, ring); + } + + drm_intel_bo_map(target_buffer, 0); + // map to force waiting on rendering + drm_intel_bo_unmap(target_buffer); +} + +igt_simple_main +{ + int fd; + int devid; + int i; + + fd
[Intel-gfx] [PATCH V2 3/6] drm/i915:Handle the irq interrupt for the second BSD ring
Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_irq.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7a4d3ae..63bd5de 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, DRM_ERROR(The master control interrupt lied (GT0)!\n); } - if (master_ctl GEN8_GT_VCS1_IRQ) { + if (master_ctl (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { tmp = I915_READ(GEN8_GT_IIR(1)); if (tmp) { ret = IRQ_HANDLED; vcs = tmp GEN8_VCS1_IRQ_SHIFT; if (vcs GT_RENDER_USER_INTERRUPT) notify_ring(dev, dev_priv-ring[VCS]); + vcs = tmp GEN8_VCS2_IRQ_SHIFT; + if (vcs GT_RENDER_USER_INTERRUPT) + notify_ring(dev, dev_priv-ring[VCS2]); I915_WRITE(GEN8_GT_IIR(1), tmp); } else DRM_ERROR(The master control interrupt lied (GT1)!\n); -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V2 2/6] drm/i915:Initialize the second BSD ring on BDW GT3 machine
Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c |4 +-- drivers/gpu/drm/i915/i915_drv.h |2 ++ drivers/gpu/drm/i915/i915_gem.c |9 +- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 54 +++ drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++- 7 files changed, 71 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 17fbbe5..2a7842b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -282,7 +282,7 @@ static const struct intel_device_info intel_broadwell_m_info = { static const struct intel_device_info intel_broadwell_gt3d_info = { .gen = 8, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, @@ -292,7 +292,7 @@ static const struct intel_device_info intel_broadwell_gt3d_info = { static const struct intel_device_info intel_broadwell_gt3m_info = { .gen = 8, .is_mobile = 1, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 761fc53..ac5598c3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1827,7 +1827,9 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) +#define BSD2_RING (1VCS2) #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BSD2(dev) (INTEL_INFO(dev)-ring_mask BSD2_RING) #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85c9cf0..b4dcf2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4374,13 +4374,20 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_blt_ring; } + if (HAS_BSD2(dev)) { + ret = intel_init_bsd2_ring_buffer(dev); + if (ret) + goto cleanup_vebox_ring; + } ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_vebox_ring; + goto cleanup_ring; return 0; +cleanup_ring: + intel_cleanup_ring_buffer(dev_priv-ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(dev_priv-ring[VECS]); cleanup_blt_ring: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4865ade..3cab7f9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,6 +42,7 @@ static const char *ring_str(int ring) case VCS: return bsd; case BCS: return blt; case VECS: return vebox; + case VCS2: return second bsd; default: return ; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f84555..0b88508 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -760,6 +760,7 @@ enum punit_power_well { #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 +#define GEN8_BSD2_RING_BASE0x1c000 #define VEBOX_RING_BASE0x1a000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base)((base)+0x30) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb3dd26..8b9b89080 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1924,10 +1924,12 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV; ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB; ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE; + ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID; ring-signal_mbox[RCS] = GEN6_NOSYNC; ring-signal_mbox[VCS] = GEN6_VRSYNC
[Intel-gfx] [PATCH V2 4/6] drm/i915: Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning
V1-V2: Follow Daniel's comment to update the comment The Gen7 doesn't have the second BSD ring. But it will complain the switch check warning message during compilation. So just add it to remove the switch check warning. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c |5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8b9b89080..2c89525 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -988,6 +988,11 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) case BCS: mmio = BLT_HWS_PGA_GEN7; break; + /* +* VCS2 actually doesn't exist on Gen7. Only shut up +* gcc switch check warning +*/ + case VCS2: case VCS: mmio = BSD_HWS_PGA_GEN7; break; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V2 1/6] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3
V1-V2: Follow Daniel's comment to consider the stolen check for BDW in kernel/early-quirks.c Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 26 -- include/drm/i915_pciids.h | 22 +- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5d8250f..17fbbe5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -311,8 +331,10 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_HSW_M_IDS(intel_haswell_m_info), \ INTEL_VLV_M_IDS(intel_valleyview_m_info), \ INTEL_VLV_D_IDS(intel_valleyview_d_info), \ - INTEL_BDW_M_IDS(intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(intel_broadwell_d_info) + INTEL_BDW_GT12M_IDS(intel_broadwell_m_info), \ + INTEL_BDW_GT12D_IDS(intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4..24f3cad 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -223,14 +223,26 @@ _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ -#define INTEL_BDW_M_IDS(info) \ +#define INTEL_BDW_GT12M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) -#define INTEL_BDW_D_IDS(info) \ +#define INTEL_BDW_GT12D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) +#define INTEL_BDW_M_IDS(info) \ + INTEL_BDW_GT12M_IDS(info), \ + INTEL_BDW_GT3M_IDS(info) + +#define INTEL_BDW_D_IDS(info) \ + INTEL_BDW_GT12D_IDS(info), \ + INTEL_BDW_GT3D_IDS(info) + #endif /* _I915_PCIIDS_H */ -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3
V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism. This is only to add the support of dual BSD rings on BDW GT3 machine. The further optimization will be considered in another patch set. The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middle. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case it can play back video stream while encoding another video stream. The coarse ping-pong mechanism is used to determine which BSD ring is used to dispatch the BSD video command. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_dma.c|3 +++ drivers/gpu/drm/i915/i915_drv.h|3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 37 +++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0b38f88..4d27cf4 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(dev_priv-backlight_lock); spin_lock_init(dev_priv-uncore.lock); spin_lock_init(dev_priv-mm.object_stat_lock); + atomic_set(dev_priv-bsd_cmd_counter, 0); mutex_init(dev_priv-dpio_lock); mutex_init(dev_priv-modeset_restore_lock); @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; + if (file_priv file_priv-bsd_ring) + file_priv-bsd_ring = NULL; kfree(file_priv); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ac5598c3..68e8166 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1466,6 +1466,8 @@ struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* the lock for dispatch video commands on two BSD rings */ + atomic_t bsd_cmd_counter; }; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -1673,6 +1675,7 @@ struct drm_i915_file_private { struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; + struct intel_ring_buffer *bsd_ring; }; /* diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 341ec68..720ef17 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,6 +999,34 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +/** + * Find one BSD ring to dispatch the corresponding BSD command. + * The Ring ID is returned. + */ +static int gen8_dispatch_bsd_ring(struct drm_device *dev, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_file_private *file_priv = file-driver_priv; + + /* Check whether the file_priv is using one ring */ + if (file_priv-bsd_ring) + return file_priv-bsd_ring-id; + else { + /* If no, use the ping-pong mechanism to select one ring */ + int counter, ring_id; + smp_mb__before_atomic_inc(); + counter = atomic_inc_return(dev_priv-bsd_cmd_counter); + if (counter % 2 == 0) + ring_id = VCS; + else + ring_id = VCS2; + + file_priv-bsd_ring = dev_priv-ring[ring_id]; + return ring_id; + } +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1043,7 +1071,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) ring = dev_priv-ring[RCS]; - else + else if ((args-flags I915_EXEC_RING_MASK) == I915_EXEC_BSD) { + if (HAS_BSD2(dev)) { + int ring_id; + ring_id = gen8_dispatch_bsd_ring(dev, file); + ring = dev_priv-ring[ring_id]; + } else + ring = dev_priv-ring[VCS]; + } else ring = dev_priv-ring[(args-flags I915_EXEC_RING_MASK) - 1]; if (!intel_ring_initialized(ring)) { -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V2 0/6] drm/i915: Add the support of dual BSD rings on BDW GT3
V1-V2: Follow Daniel's comment to do the following update: a. consider the stolen check for BDW in kernel/early-quirks.c in patch 01 b. update the comment in Patch 04 c. use the simple ping-pong mechanism to add the support of dual BSD rings. The further optimization will be considered in another patch set. This is the patch set that tries to add the support of dual BSD rings on BDW GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. In such case the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. Zhao Yakui (6): drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3 drm/i915:Initialize the second BSD ring on BDW GT3 machine drm/i915:Handle the irq interrupt for the second BSD ring drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning drm/i915:Update the restrict check to filter out wrong Ring ID passed by user-space drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3 drivers/gpu/drm/i915/i915_dma.c|3 ++ drivers/gpu/drm/i915/i915_drv.c| 26 +++- drivers/gpu/drm/i915/i915_drv.h|5 +++ drivers/gpu/drm/i915/i915_gem.c|9 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 39 +- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_irq.c|5 ++- drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 59 drivers/gpu/drm/i915/intel_ringbuffer.h|5 ++- include/drm/i915_pciids.h | 22 --- 11 files changed, 163 insertions(+), 12 deletions(-) -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V2 5/6] drm/i915: Update the restrict check to filter out wrong Ring ID passed by user-space
One extra ring is added in the kernel driver but it is transparent to the user-space application/middleware. In such case the number of the rings in kernel driver is bigger than that exported to the user-space. So it needs to filter out the wrong Ring ID passed by user-space. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 +- drivers/gpu/drm/i915/intel_ringbuffer.h|1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3491402..341ec68 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1035,7 +1035,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args-flags I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; - if ((args-flags I915_EXEC_RING_MASK) I915_NUM_RINGS) { + if ((args-flags I915_EXEC_RING_MASK) LAST_USER_RING) { DRM_DEBUG(execbuf with unknown ring: %d\n, (int)(args-flags I915_EXEC_RING_MASK)); return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8ca4285..59f4cdd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -64,6 +64,7 @@ struct intel_ring_buffer { VCS2, } id; #define I915_NUM_RINGS 5 +#define LAST_USER_RING (VECS + 1) u32 mmio_base; void__iomem *virtual_start; struct drm_device *dev; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3
On Thu, 2014-04-10 at 00:48 -0600, Daniel Vetter wrote: On Thu, Apr 10, 2014 at 10:24:53AM +0800, Zhao Yakui wrote: On Wed, 2014-04-09 at 08:34 -0600, Daniel Vetter wrote: On Wed, Apr 09, 2014 at 09:59:56AM +0800, Zhao Yakui wrote: The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. At the same time the coarse dispatch mechanism can help to avoid the object synchronization between the BSD rings. Signed-off-by: Zhao Yakui yakui.z...@intel.com This looks way too complicated. First things first please get rid of the atomic_t usage. If you don't have _massive_ comments explaining the memory barriers you're most likely using linux kernel atomic_t wrong. They are fully unordered. Thanks for the review. For the atomic_t usage: I will remove it in next version as the counter is already protected by the lock. With that out of the way this still looks a bit complicated really. Can't we just use a very simple static rule in gen8_dispatch_bsd_ring which hashed the pointer address of the file_priv? Just to get things going, once we have a clear need we can try to make things more intelligent. But in case of doubt I really prefer if we start with the dumbest possible approach first and add complexity instead of starting with something really complex and simplifying it. Do you mean that file_priv is hashed and then is mapped to BSD 0 or 1 ring? Yeah, that's the idea. Get in the basic support first, make it fancy like you describe below second. This has a few upsides: - We can concentrate on validating basic support in the first round instead of potentially fighting a bug in the load balancer. - Discussions and performance testing for the load balancer won't hold up the entire feature. - Like I've said this might not be required. Before we add more complexity than just hashing the file_priv I want to see some benchmarks of expected workloads that show that the load balancing is indeed a good idea - for the case of a transcode server I guess we should have sufficient in-flight operations that it won't really matter. Or at least I hope so. OK. Understand your concerns. I can split it two steps. One is to add the basic support. The second step is for the optimization. But I don't think that the hash of file_priv is a good idea. As it only has two rings, it is possible that the hash value is always mapped to BSD ring 0. In such case when multiples video clips are played back, the performance can't meet with the requirement.(For example: User can play back 4 1080p video clips concurrently when only one BSD ring is used. On the BDW GT3, they hope to play back 8 1080p video clips concurrently. The poor hash design will cause that all the workload are mapped to one BSD ring and then it can't meet with the requirement). How about using the ping-pong mechanism for the file_priv? For one new fd, it will use BSD ring 0 and then next file_priv will use BSD ring 1. Then BSD ring 0BSD ring 1. Does this make sense to you? So maybe split this patch up into the first step with the basic file_priv hashing mapping and the 2nd patch to add the improved algo? Cheers, Daniel The GT3 machine has two independent BSD rings. It will be better that the kernel driver can balance the video workload between the two rings. When using the hashed file_priv to select BSD ring, the video balance depends on the design of hash design. Under some scenarios, it will be possible that one ring is very busy while another ring is very idle. And then performance of video playing back/encoding will be affected. At the same time the hash mechanism is only used to select the corresponding BSD ring when one drm_fd is opened. And it doesn't consider the video workload balance after finishing some workloads. The following is the basic idea in my patch.(A counter variable is added for ring. The bigger the counter, the higher the workload). a. When one new fd needs to dispatch the BSD video command, it will select the ring with the lowest workload(lowest counter). And then counter in this ring will be added. b. when the drm fd is closed(the workload is finished), the counter of the ring used by file_priv will be decreased. c. When the drm fd already selects one BSD ring in previously submitted command, it will check whether it is using the ring with the lowest workload(lowest counter). If not, it can be switched. The purpose is to assure that the workload
Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3
On Thu, 2014-04-10 at 00:58 -0600, Daniel Vetter wrote: On Thu, Apr 10, 2014 at 11:28:46AM +0800, Zhao Yakui wrote: On Wed, 2014-04-09 at 08:45 -0600, Daniel Vetter wrote: On Wed, Apr 09, 2014 at 09:59:51AM +0800, Zhao Yakui wrote: This is the patch set that tries to add the support of dual BSD rings on BDW GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. In such case the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. At the same time the coarse dispatch mechanism can help to avoid the object synchronization between the BSD rings. Ok, I've quickly read through it all and commented on a few things. Imo the last patch should be massively simplified, at least for the first round. Other things look small. Hi, Daniel Thanks for your review. What's still missing are testcases, and I have two things in mind here: - Exercise the 2nd ring dispatch and sync a bit. Since the 2nd bsd ring is hidden within the kernel I think the right approach would be to open a few drm fds (10 or so) and then randomly use them with a dummy reloc. We have two testcases which can be used as blueprints that need adjustement: - gem_ring_sync_loop: Probably easiest to copy it to a new file as gem_multi_bsd_sync_loop. This test exercises semaphores. - gem_dummy_reloc_loop, subtest mixed: Almost the same as the above, but the sync is done _inside_ the loop and hence this exercises gpu/cpu sync. We need both tests adjusted, for for this we need a new multi-bsd test. Agree with your concerns. I will try to add the gem_multi_bsd_sync_loop/dummy_reloc_loop test case so that it can test the sync with multi-BSD. BTW: How about if I directly add multiple fds in gem_ring_sync_loop test case and then test the sync among the different rings? In such case the user-application doesn't need to know the existence of multi-BSD rings. We don't need it for the other rings, so I think it's better to leave the existing tests as-is to avoid introducing bugs. Testing testcase is always fairly hard, since you have to break your kernel to make sure the test still catches bugs ;-) Also for testing VCS1 and VCS2 we need to have multiple fd using the _same_ logical ring exposed to userspace, so the test logic will look a bit different anyway. OK. I will add the separated two test cases for it. - New testcase to fully test main execbuffer flags. This is simply something that's we don't yet have. The next guy to touch execbuf code needs to add it, and it looks like that's you ;-) I've done a JIRA task for the resource streamer work, but I think the resource streamer wont be merged anytime soon. So I'll reassign to you. Jira task is VIZ-3129. For the new testcase of execbuffer flag: Do you have any idea about which kind of exec flag needs to be checked? Do you have any idea about the expected failure/successful behavour for the flags? For example: I915_EXEC_PINNED : If one object is not pinned and submitted, what behavour is expected? Fail or wrong? I've clarified the JIRA, the test is just for the flags/values in the main execbuf structure. And the idea is to do the basic api sanity checking as outlined in my blog post http://blog.ffwll.ch/2013/11/botching-up-ioctls.html i.e. go through all fields in struct drm_i915_gem_execbuffer2 and write a test which checks that the kernel correctly rejects invalid input data. So e.g. for pointer you can supply NULL or a pointer to invalid memory, buffer count is already checked with the overflow tests, but also invalid flags and also making sure that if reserved fields aren't 0 the kernel rejects the batch. Of course to be able to check this you first need to construct a valid no-op batch (e.g. copy from gem_exec_nop.c) and submit it (to make sure no one breaks the test later on). Then each subtest only changes the relevant field to make sure the kernel really did check the field (and not just returned -EINVAL due to something else). Some execbuf fields are special and e.g. contexts are not valid when there's no hw context support. If you want to look for examples check out the basic api tests for recently added ioctls like in gem_reset_stats.c. Execbuffer ioctl is simply a bit more complex. OK. I will take a look at your blog and understand what you mentioned. BTW: Does it need to check all the flags defined in i915_drm.h or the exported flag returned by i915_get_parameter? Thanks. Yakui Cheers
Re: [Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3
On Thu, 2014-04-10 at 03:03 -0600, Daniel Vetter wrote: On Thu, Apr 10, 2014 at 04:04:22PM +0800, Zhao Yakui wrote: On Thu, 2014-04-10 at 00:48 -0600, Daniel Vetter wrote: On Thu, Apr 10, 2014 at 10:24:53AM +0800, Zhao Yakui wrote: On Wed, 2014-04-09 at 08:34 -0600, Daniel Vetter wrote: On Wed, Apr 09, 2014 at 09:59:56AM +0800, Zhao Yakui wrote: The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. At the same time the coarse dispatch mechanism can help to avoid the object synchronization between the BSD rings. Signed-off-by: Zhao Yakui yakui.z...@intel.com This looks way too complicated. First things first please get rid of the atomic_t usage. If you don't have _massive_ comments explaining the memory barriers you're most likely using linux kernel atomic_t wrong. They are fully unordered. Thanks for the review. For the atomic_t usage: I will remove it in next version as the counter is already protected by the lock. With that out of the way this still looks a bit complicated really. Can't we just use a very simple static rule in gen8_dispatch_bsd_ring which hashed the pointer address of the file_priv? Just to get things going, once we have a clear need we can try to make things more intelligent. But in case of doubt I really prefer if we start with the dumbest possible approach first and add complexity instead of starting with something really complex and simplifying it. Do you mean that file_priv is hashed and then is mapped to BSD 0 or 1 ring? Yeah, that's the idea. Get in the basic support first, make it fancy like you describe below second. This has a few upsides: - We can concentrate on validating basic support in the first round instead of potentially fighting a bug in the load balancer. - Discussions and performance testing for the load balancer won't hold up the entire feature. - Like I've said this might not be required. Before we add more complexity than just hashing the file_priv I want to see some benchmarks of expected workloads that show that the load balancing is indeed a good idea - for the case of a transcode server I guess we should have sufficient in-flight operations that it won't really matter. Or at least I hope so. OK. Understand your concerns. I can split it two steps. One is to add the basic support. The second step is for the optimization. But I don't think that the hash of file_priv is a good idea. As it only has two rings, it is possible that the hash value is always mapped to BSD ring 0. In such case when multiples video clips are played back, the performance can't meet with the requirement.(For example: User can play back 4 1080p video clips concurrently when only one BSD ring is used. On the BDW GT3, they hope to play back 8 1080p video clips concurrently. The poor hash design will cause that all the workload are mapped to one BSD ring and then it can't meet with the requirement). How about using the ping-pong mechanism for the file_priv? For one new fd, it will use BSD ring 0 and then next file_priv will use BSD ring 1. Then BSD ring 0BSD ring 1. Does this make sense to you? Well the point of the hash is that it's dumb and simple, but maybe too dumb. If we wend up with 3 streams on one vcs and 1 on the other, then we have a good reason to merge the 2nd patch ;-) Hi, Daniel Thanks for your comments. Now we get get the same point about the support of dual BSD rings on BDW GT3 machine. So this will be divided into two steps. The first step is to use the simple ping-pong mechanism to add the basic support. And the second step is for the optimization(balance video workloads among the two rings). From my point the ping-pong mechanism is simpler and easier to implement. Of course this can also be regarded as the specific hash. Really, the point of the first patch is just so that we have /something/ which uses both rings with a reasonable chance, so that we can get testing and validation off the ground. E.g. in the test I'd use 10 or so drm fds to make sure that at least one of them uses the other ring, in case the hash function isn't great. Understand. We need such test case to verify it. This is already in my plan. Thanks. Yakui -Daniel ___ Intel-gfx mailing list Intel
Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3
On Thu, 2014-04-10 at 03:04 -0600, Daniel Vetter wrote: On Thu, Apr 10, 2014 at 04:28:34PM +0800, Zhao Yakui wrote: BTW: Does it need to check all the flags defined in i915_drm.h or the exported flag returned by i915_get_parameter? I don't have i915_get_parameter anywhere in my sources, so no idea what you mean ... Sorry that the function should be i915_getparam. It is called by the I915_GETPARAM ioctl to query the flag supported by the driver. Thanks. Yakui -Daniel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3
On Wed, 2014-04-09 at 08:27 -0600, Daniel Vetter wrote: On Wed, Apr 09, 2014 at 09:59:52AM +0800, Zhao Yakui wrote: Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 24 +++- include/drm/i915_pciids.h | 10 +++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a01faea..609f837 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -312,7 +332,9 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_VLV_M_IDS(intel_valleyview_m_info), \ INTEL_VLV_D_IDS(intel_valleyview_d_info), \ INTEL_BDW_M_IDS(intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(intel_broadwell_d_info) + INTEL_BDW_D_IDS(intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info) You've forgotten to update the stolen memory quirk table in the x86 code. Just grep for INTEL_BDW_M_IDS to see all users of these macros. Thanks for your info. I will update it in next version. Thanks. Yakui -Daniel static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4..32d75f8 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -225,12 +225,16 @@ #define INTEL_BDW_M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) #define INTEL_BDW_D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) #endif /* _I915_PCIIDS_H */ -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 4/5] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning
On Wed, 2014-04-09 at 08:29 -0600, Daniel Vetter wrote: On Wed, Apr 09, 2014 at 09:59:55AM +0800, Zhao Yakui wrote: The Gen7 doesn't have the second BSD ring. But it will complain the switch check warning message during compilation. So just add it to remove the switch check warning. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 11d0687..43e0227 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -984,6 +984,7 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) case BCS: mmio = BLT_HWS_PGA_GEN7; break; + case VCS2: Maybe add a /* doesn't actually exist but shuts up gcc */ comment? Make sense. I will update it. Thanks. Yakui -Daniel case VCS: mmio = BSD_HWS_PGA_GEN7; break; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3
On Wed, 2014-04-09 at 08:34 -0600, Daniel Vetter wrote: On Wed, Apr 09, 2014 at 09:59:56AM +0800, Zhao Yakui wrote: The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. At the same time the coarse dispatch mechanism can help to avoid the object synchronization between the BSD rings. Signed-off-by: Zhao Yakui yakui.z...@intel.com This looks way too complicated. First things first please get rid of the atomic_t usage. If you don't have _massive_ comments explaining the memory barriers you're most likely using linux kernel atomic_t wrong. They are fully unordered. Thanks for the review. For the atomic_t usage: I will remove it in next version as the counter is already protected by the lock. With that out of the way this still looks a bit complicated really. Can't we just use a very simple static rule in gen8_dispatch_bsd_ring which hashed the pointer address of the file_priv? Just to get things going, once we have a clear need we can try to make things more intelligent. But in case of doubt I really prefer if we start with the dumbest possible approach first and add complexity instead of starting with something really complex and simplifying it. Do you mean that file_priv is hashed and then is mapped to BSD 0 or 1 ring? The GT3 machine has two independent BSD rings. It will be better that the kernel driver can balance the video workload between the two rings. When using the hashed file_priv to select BSD ring, the video balance depends on the design of hash design. Under some scenarios, it will be possible that one ring is very busy while another ring is very idle. And then performance of video playing back/encoding will be affected. At the same time the hash mechanism is only used to select the corresponding BSD ring when one drm_fd is opened. And it doesn't consider the video workload balance after finishing some workloads. The following is the basic idea in my patch.(A counter variable is added for ring. The bigger the counter, the higher the workload). a. When one new fd needs to dispatch the BSD video command, it will select the ring with the lowest workload(lowest counter). And then counter in this ring will be added. b. when the drm fd is closed(the workload is finished), the counter of the ring used by file_priv will be decreased. c. When the drm fd already selects one BSD ring in previously submitted command, it will check whether it is using the ring with the lowest workload(lowest counter). If not, it can be switched. The purpose is to assure that the workload is still balanced between the two BSD rings. For example: User wants to play back four video clips. BSD 0 ring is selected to play back the two long clips. BSD 1 ring is selected to play back the two short clips. After it finishes the playing back of two short clips, the BSD 1 ring can be switched to play back the long clip. Still balance. What do you think? -Daniel --- drivers/gpu/drm/i915/i915_dma.c| 14 ++ drivers/gpu/drm/i915/i915_drv.h|3 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 73 +++- drivers/gpu/drm/i915/intel_ringbuffer.c|2 + drivers/gpu/drm/i915/intel_ringbuffer.h|2 + 5 files changed, 93 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0b38f88..8260463 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(dev_priv-backlight_lock); spin_lock_init(dev_priv-uncore.lock); spin_lock_init(dev_priv-mm.object_stat_lock); + spin_lock_init(dev_priv-bsd_lock); mutex_init(dev_priv-dpio_lock); mutex_init(dev_priv-modeset_restore_lock); @@ -1928,7 +1929,20 @@ void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv) void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; + struct intel_ring_buffer *bsd_ring; + struct drm_i915_private *dev_priv = dev-dev_private; + if (file_priv file_priv-bsd_ring) { + int cmd_counter; + bsd_ring = file_priv-bsd_ring; + file_priv-bsd_ring = NULL; + spin_lock(dev_priv-bsd_lock); + cmd_counter = atomic_sub_return(1, bsd_ring-bsd_cmd_counter); + if (cmd_counter 0) { + atomic_set(bsd_ring
Re: [Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3
On Wed, 2014-04-09 at 08:45 -0600, Daniel Vetter wrote: On Wed, Apr 09, 2014 at 09:59:51AM +0800, Zhao Yakui wrote: This is the patch set that tries to add the support of dual BSD rings on BDW GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. In such case the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. At the same time the coarse dispatch mechanism can help to avoid the object synchronization between the BSD rings. Ok, I've quickly read through it all and commented on a few things. Imo the last patch should be massively simplified, at least for the first round. Other things look small. Hi, Daniel Thanks for your review. What's still missing are testcases, and I have two things in mind here: - Exercise the 2nd ring dispatch and sync a bit. Since the 2nd bsd ring is hidden within the kernel I think the right approach would be to open a few drm fds (10 or so) and then randomly use them with a dummy reloc. We have two testcases which can be used as blueprints that need adjustement: - gem_ring_sync_loop: Probably easiest to copy it to a new file as gem_multi_bsd_sync_loop. This test exercises semaphores. - gem_dummy_reloc_loop, subtest mixed: Almost the same as the above, but the sync is done _inside_ the loop and hence this exercises gpu/cpu sync. We need both tests adjusted, for for this we need a new multi-bsd test. Agree with your concerns. I will try to add the gem_multi_bsd_sync_loop/dummy_reloc_loop test case so that it can test the sync with multi-BSD. BTW: How about if I directly add multiple fds in gem_ring_sync_loop test case and then test the sync among the different rings? In such case the user-application doesn't need to know the existence of multi-BSD rings. - New testcase to fully test main execbuffer flags. This is simply something that's we don't yet have. The next guy to touch execbuf code needs to add it, and it looks like that's you ;-) I've done a JIRA task for the resource streamer work, but I think the resource streamer wont be merged anytime soon. So I'll reassign to you. Jira task is VIZ-3129. For the new testcase of execbuffer flag: Do you have any idea about which kind of exec flag needs to be checked? Do you have any idea about the expected failure/successful behavour for the flags? For example: I915_EXEC_PINNED : If one object is not pinned and submitted, what behavour is expected? Fail or wrong? Thanks. Yakui Thanks, Daniel Zhao Yakui (5): drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3 drm/i915: Initialize the second BSD ring on BDW GT3 machine drm/i915: Handle the irq interrupt for the second BSD ring drm/i915: Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning drm/i915: Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3 drivers/gpu/drm/i915/i915_dma.c| 14 ++ drivers/gpu/drm/i915/i915_drv.c| 24 - drivers/gpu/drm/i915/i915_drv.h|5 ++ drivers/gpu/drm/i915/i915_gem.c|9 +++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 73 +++- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_irq.c|5 +- drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 57 ++ drivers/gpu/drm/i915/intel_ringbuffer.h|6 ++- include/drm/i915_pciids.h | 10 ++-- 11 files changed, 197 insertions(+), 8 deletions(-) -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3
The BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. Instead the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. At the same time the coarse dispatch mechanism can help to avoid the object synchronization between the BSD rings. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_dma.c| 14 ++ drivers/gpu/drm/i915/i915_drv.h|3 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 73 +++- drivers/gpu/drm/i915/intel_ringbuffer.c|2 + drivers/gpu/drm/i915/intel_ringbuffer.h|2 + 5 files changed, 93 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0b38f88..8260463 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(dev_priv-backlight_lock); spin_lock_init(dev_priv-uncore.lock); spin_lock_init(dev_priv-mm.object_stat_lock); + spin_lock_init(dev_priv-bsd_lock); mutex_init(dev_priv-dpio_lock); mutex_init(dev_priv-modeset_restore_lock); @@ -1928,7 +1929,20 @@ void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv) void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; + struct intel_ring_buffer *bsd_ring; + struct drm_i915_private *dev_priv = dev-dev_private; + if (file_priv file_priv-bsd_ring) { + int cmd_counter; + bsd_ring = file_priv-bsd_ring; + file_priv-bsd_ring = NULL; + spin_lock(dev_priv-bsd_lock); + cmd_counter = atomic_sub_return(1, bsd_ring-bsd_cmd_counter); + if (cmd_counter 0) { + atomic_set(bsd_ring-bsd_cmd_counter, 0); + } + spin_unlock(dev_priv-bsd_lock); + } kfree(file_priv); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d77f4e0..128639c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1457,6 +1457,8 @@ struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* the lock for dispatch video commands on two BSD rings */ + spinlock_t bsd_lock; }; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -1664,6 +1666,7 @@ struct drm_i915_file_private { struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; + struct intel_ring_buffer *bsd_ring; }; /* diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3491402..75d8cc0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,6 +999,70 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +/** + * Find one BSD ring to dispatch the corresponding BSD command. + * The Ring ID is returned. + */ +static int gen8_dispatch_bsd_ring(struct drm_device *dev, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct drm_i915_file_private *file_priv = file-driver_priv; + struct intel_ring_buffer *temp_ring, *bsd_ring; + int bsd_counter, temp_counter; + + if (file_priv-bsd_ring) { + /* Check whether the load balance is required.*/ + spin_lock(dev_priv-bsd_lock); + bsd_counter = atomic_read((file_priv-bsd_ring-bsd_cmd_counter)); + temp_ring = dev_priv-ring[VCS]; + temp_counter = atomic_read(temp_ring-bsd_cmd_counter); + bsd_ring = dev_priv-ring[VCS]; + + temp_ring = dev_priv-ring[VCS2]; + if (atomic_read(temp_ring-bsd_cmd_counter) temp_counter) { + temp_counter = atomic_read(temp_ring-bsd_cmd_counter); + bsd_ring = temp_ring; + } + /* +* If it is already the ring with the minimum load, it is +* unnecessary to switch it. +*/ + if (bsd_ring == file_priv-bsd_ring) { + spin_unlock(dev_priv-bsd_lock); + return bsd_ring-id; + } + /* +* If the load delta between current ring and target ring
[Intel-gfx] [PATCH 3/5] drm/i915:Handle the irq interrupt for the second BSD ring
Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_irq.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index bdda3b5..d5b1dd3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1347,13 +1347,16 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, DRM_ERROR(The master control interrupt lied (GT0)!\n); } - if (master_ctl GEN8_GT_VCS1_IRQ) { + if (master_ctl (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { tmp = I915_READ(GEN8_GT_IIR(1)); if (tmp) { ret = IRQ_HANDLED; vcs = tmp GEN8_VCS1_IRQ_SHIFT; if (vcs GT_RENDER_USER_INTERRUPT) notify_ring(dev, dev_priv-ring[VCS]); + vcs = tmp GEN8_VCS2_IRQ_SHIFT; + if (vcs GT_RENDER_USER_INTERRUPT) + notify_ring(dev, dev_priv-ring[VCS2]); I915_WRITE(GEN8_GT_IIR(1), tmp); } else DRM_ERROR(The master control interrupt lied (GT1)!\n); -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 0/5] drm/i915: Add the support of dual BSD rings on BDW GT3
This is the patch set that tries to add the support of dual BSD rings on BDW GT3. Based on hardware spec, the BDW GT3 has two independent BSD rings, which can be used to process the video commands. To be simpler, it is transparent to user-space driver/middleware. In such case the kernel driver will decide which ring is to dispatch the BSD video command. As every BSD ring is powerful, it is enough to dispatch the BSD video command based on the drm fd. In such case the different BSD ring is used for video playing back and encoding. At the same time the coarse dispatch mechanism can help to avoid the object synchronization between the BSD rings. Zhao Yakui (5): drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3 drm/i915: Initialize the second BSD ring on BDW GT3 machine drm/i915: Handle the irq interrupt for the second BSD ring drm/i915: Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning drm/i915: Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3 drivers/gpu/drm/i915/i915_dma.c| 14 ++ drivers/gpu/drm/i915/i915_drv.c| 24 - drivers/gpu/drm/i915/i915_drv.h|5 ++ drivers/gpu/drm/i915/i915_gem.c|9 +++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 73 +++- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_irq.c|5 +- drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 57 ++ drivers/gpu/drm/i915/intel_ringbuffer.h|6 ++- include/drm/i915_pciids.h | 10 ++-- 11 files changed, 197 insertions(+), 8 deletions(-) -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/5] drm/i915:Initialize the second BSD ring on BDW GT3 machine
Based on the hardware spec, the BDW GT3 machine has two independent BSD ring that can be used to dispatch the video commands. So just initialize it. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c |4 +-- drivers/gpu/drm/i915/i915_drv.h |2 ++ drivers/gpu/drm/i915/i915_gem.c |9 +- drivers/gpu/drm/i915/i915_gpu_error.c |1 + drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 54 +++ drivers/gpu/drm/i915/intel_ringbuffer.h |4 ++- 7 files changed, 71 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 609f837..10941c5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -282,7 +282,7 @@ static const struct intel_device_info intel_broadwell_m_info = { static const struct intel_device_info intel_broadwell_gt3d_info = { .gen = 8, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, @@ -292,7 +292,7 @@ static const struct intel_device_info intel_broadwell_gt3d_info = { static const struct intel_device_info intel_broadwell_gt3m_info = { .gen = 8, .is_mobile = 1, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, .has_llc = 1, .has_ddi = 1, .has_fbc = 1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 55addaa..d77f4e0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1817,7 +1817,9 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) +#define BSD2_RING (1VCS2) #define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BSD2(dev) (INTEL_INFO(dev)-ring_mask BSD2_RING) #define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) #define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) #define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c70121d..1756276 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4372,13 +4372,20 @@ static int i915_gem_init_rings(struct drm_device *dev) goto cleanup_blt_ring; } + if (HAS_BSD2(dev)) { + ret = intel_init_bsd2_ring_buffer(dev); + if (ret) + goto cleanup_vebox_ring; + } ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); if (ret) - goto cleanup_vebox_ring; + goto cleanup_ring; return 0; +cleanup_ring: + intel_cleanup_ring_buffer(dev_priv-ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(dev_priv-ring[VECS]); cleanup_blt_ring: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 1005af0..f6d21b3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,6 +42,7 @@ static const char *ring_str(int ring) case VCS: return bsd; case BCS: return blt; case VECS: return vebox; + case VCS2: return second bsd; default: return ; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8e60737..8f5c103 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -750,6 +750,7 @@ enum punit_power_well { #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 +#define GEN8_BSD2_RING_BASE0x1c000 #define VEBOX_RING_BASE0x1a000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base)((base)+0x30) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3d76ce1..11d0687 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1920,10 +1920,12 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring-semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV; ring-semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB; ring-semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE; + ring-semaphore_register[VCS2] = MI_SEMAPHORE_SYNC_INVALID; ring-signal_mbox[RCS] = GEN6_NOSYNC; ring-signal_mbox[VCS] = GEN6_VRSYNC
[Intel-gfx] [PATCH 4/5] drm/i915:Add the VCS2 switch in Intel_ring_setup_status_page for Gen7 to remove the switch check warning
The Gen7 doesn't have the second BSD ring. But it will complain the switch check warning message during compilation. So just add it to remove the switch check warning. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 11d0687..43e0227 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -984,6 +984,7 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) case BCS: mmio = BLT_HWS_PGA_GEN7; break; + case VCS2: case VCS: mmio = BSD_HWS_PGA_GEN7; break; -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/5] drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3
Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. Signed-off-by: Zhao Yakui yakui.z...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 24 +++- include/drm/i915_pciids.h | 10 +++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a01faea..609f837 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -312,7 +332,9 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_VLV_M_IDS(intel_valleyview_m_info), \ INTEL_VLV_D_IDS(intel_valleyview_d_info), \ INTEL_BDW_M_IDS(intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(intel_broadwell_d_info) + INTEL_BDW_D_IDS(intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(intel_broadwell_gt3d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4..32d75f8 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -225,12 +225,16 @@ #define INTEL_BDW_M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) #define INTEL_BDW_D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) #endif /* _I915_PCIIDS_H */ -- 1.7.10.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx