Re: [Intel-gfx] [PATCH 12/26] drm/i915/guc: Implement multi-lrc submission
On Fri, Oct 08, 2021 at 10:20:24AM -0700, John Harrison wrote: > On 10/4/2021 15:06, Matthew Brost wrote: > > Implement multi-lrc submission via a single workqueue entry and single > > H2G. The workqueue entry contains an updated tail value for each > > request, of all the contexts in the multi-lrc submission, and updates > > these values simultaneously. As such, the tasklet and bypass path have > > been updated to coalesce requests into a single submission. > > > > v2: > > (John Harrison) > >- s/wqe/wqi > >- Use FIELD_PREP macros > >- Add GEM_BUG_ONs ensures length fits within field > >- Add comment / white space to intel_guc_write_barrier > > (Kernel test robot) > >- Make need_tasklet a static function > > > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gt/uc/intel_guc.c| 26 ++ > > drivers/gpu/drm/i915/gt/uc/intel_guc.h| 8 + > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +- > > drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 23 +- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 319 -- > > drivers/gpu/drm/i915/i915_request.h | 8 + > > 6 files changed, 335 insertions(+), 73 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c > > b/drivers/gpu/drm/i915/gt/uc/intel_guc.c > > index 8f8182bf7c11..7191e8439290 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c > > @@ -756,3 +756,29 @@ void intel_guc_load_status(struct intel_guc *guc, > > struct drm_printer *p) > > } > > } > > } > > + > > +void intel_guc_write_barrier(struct intel_guc *guc) > > +{ > > + struct intel_gt *gt = guc_to_gt(guc); > > + > > + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { > > + /* > > +* Ensure intel_uncore_write_fw can be used rather than > > +* intel_uncore_write. > > +*/ > > + GEM_BUG_ON(guc->send_regs.fw_domains); > > + > > + /* > > +* This register is used by the i915 and GuC for MMIO based > > +* communication. Once we are in this code CTBs are the only > > +* method the i915 uses to communicate with the GuC so it is > > +* safe to write to this register (a value of 0 is NOP for MMIO > > +* communication). If we ever start mixing CTBs and MMIOs a new > > +* register will have to be chosen. > > +*/ > Hmm, missed it before but this comment is very CTB centric and the barrier > function is now being used for parallel submission work queues. Seems like > an extra comment should be added to cover that case. Just something simple > about WQ usage is also guaranteed to be post CTB switch over. > Sure. > > + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); > > + } else { > > + /* wmb() sufficient for a barrier if in smem */ > > + wmb(); > > + } > > +} > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > index a9f4ec972bfb..147f39cc0f2f 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > @@ -46,6 +46,12 @@ struct intel_guc { > > * submitted until the stalled request is processed. > > */ > > struct i915_request *stalled_request; > > + enum { > > + STALL_NONE, > > + STALL_REGISTER_CONTEXT, > > + STALL_MOVE_LRC_TAIL, > > + STALL_ADD_REQUEST, > > + } submission_stall_reason; > > /* intel_guc_recv interrupt related state */ > > /** @irq_lock: protects GuC irq state */ > > @@ -361,4 +367,6 @@ void intel_guc_submission_cancel_requests(struct > > intel_guc *guc); > > void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); > > +void intel_guc_write_barrier(struct intel_guc *guc); > > + > > #endif > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > index 20c710a74498..10d1878d2826 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > @@ -377,28 +377,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) > > return ++ct->requests.last_fence; > > } > > -static void write_barrier(struct intel_guc_ct *ct) > > -{ > > - struct intel_guc *guc = ct_to_guc(ct); > > - struct intel_gt *gt = guc_to_gt(guc); > > - > > - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { > > - GEM_BUG_ON(guc->send_regs.fw_domains); > > - /* > > -* This register is used by the i915 and GuC for MMIO based > > -* communication. Once we are in this code CTBs are the only > > -* method the i915 uses to communicate with the GuC so it is > > -* safe to write to this register (a value of 0 is NOP for MMIO > > -* communication). If we ever start mixing CTBs and MMIOs
Re: [Intel-gfx] [PATCH 12/26] drm/i915/guc: Implement multi-lrc submission
On 10/4/2021 15:06, Matthew Brost wrote: Implement multi-lrc submission via a single workqueue entry and single H2G. The workqueue entry contains an updated tail value for each request, of all the contexts in the multi-lrc submission, and updates these values simultaneously. As such, the tasklet and bypass path have been updated to coalesce requests into a single submission. v2: (John Harrison) - s/wqe/wqi - Use FIELD_PREP macros - Add GEM_BUG_ONs ensures length fits within field - Add comment / white space to intel_guc_write_barrier (Kernel test robot) - Make need_tasklet a static function Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 26 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.h| 8 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 23 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 319 -- drivers/gpu/drm/i915/i915_request.h | 8 + 6 files changed, 335 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 8f8182bf7c11..7191e8439290 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -756,3 +756,29 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) } } } + +void intel_guc_write_barrier(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + /* +* Ensure intel_uncore_write_fw can be used rather than +* intel_uncore_write. +*/ + GEM_BUG_ON(guc->send_regs.fw_domains); + + /* +* This register is used by the i915 and GuC for MMIO based +* communication. Once we are in this code CTBs are the only +* method the i915 uses to communicate with the GuC so it is +* safe to write to this register (a value of 0 is NOP for MMIO +* communication). If we ever start mixing CTBs and MMIOs a new +* register will have to be chosen. +*/ Hmm, missed it before but this comment is very CTB centric and the barrier function is now being used for parallel submission work queues. Seems like an extra comment should be added to cover that case. Just something simple about WQ usage is also guaranteed to be post CTB switch over. + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index a9f4ec972bfb..147f39cc0f2f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,6 +46,12 @@ struct intel_guc { * submitted until the stalled request is processed. */ struct i915_request *stalled_request; + enum { + STALL_NONE, + STALL_REGISTER_CONTEXT, + STALL_MOVE_LRC_TAIL, + STALL_ADD_REQUEST, + } submission_stall_reason; /* intel_guc_recv interrupt related state */ /** @irq_lock: protects GuC irq state */ @@ -361,4 +367,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_write_barrier(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 20c710a74498..10d1878d2826 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -377,28 +377,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } -static void write_barrier(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_gt *gt = guc_to_gt(guc); - - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { - GEM_BUG_ON(guc->send_regs.fw_domains); - /* -* This register is used by the i915 and GuC for MMIO based -* communication. Once we are in this code CTBs are the only -* method the i915 uses to communicate with the GuC so it is -* safe to write to this register (a value of 0 is NOP for MMIO -* communication). If we ever start mixing CTBs and MMIOs a new -* register will have to be chosen. -*/ - intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); - } else { - /* wmb() sufficient for a barrier if in smem */ - wmb(); - } -} - static int ct_write(struct
Re: [Intel-gfx] [PATCH 12/26] drm/i915/guc: Implement multi-lrc submission
Hi Matthew, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master airlied/drm-next v5.15-rc3 next-20210922] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Matthew-Brost/Parallel-submission-aka-multi-bb-execbuf/20211005-061424 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-a003-20211004 (attached as .config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project c0039de2953d15815448b4b3c3bafb45607781e0) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/3bea3cc438df1105d0d8c1bcc01b96559d4bb78c git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Matthew-Brost/Parallel-submission-aka-multi-bb-execbuf/20211005-061424 git checkout 3bea3cc438df1105d0d8c1bcc01b96559d4bb78c # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): >> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1486:7: error: variable >> 'ret' is used uninitialized whenever 'if' condition is false >> [-Werror,-Wsometimes-uninitialized] if (multi_lrc_submit(rq)) { ^~~~ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1499:9: note: uninitialized use occurs here return ret; ^~~ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1486:3: note: remove the 'if' if its condition is always true if (multi_lrc_submit(rq)) { ^~ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1479:9: note: initialize the variable 'ret' to silence this warning int ret; ^ = 0 1 error generated. vim +1486 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 1475 1476 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1477 struct i915_request *rq) 1478 { 1479 int ret; 1480 1481 __i915_request_submit(rq); 1482 1483 trace_i915_request_in(rq, 0); 1484 1485 if (is_multi_lrc_rq(rq)) { > 1486 if (multi_lrc_submit(rq)) { 1487 ret = guc_wq_item_append(guc, rq); 1488 if (!ret) 1489 ret = guc_add_request(guc, rq); 1490 } 1491 } else { 1492 guc_set_lrc_tail(rq); 1493 ret = guc_add_request(guc, rq); 1494 } 1495 1496 if (unlikely(ret == -EPIPE)) 1497 disable_submission(guc); 1498 1499 return ret; 1500 } 1501 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org .config.gz Description: application/gzip
Re: [Intel-gfx] [PATCH 12/26] drm/i915/guc: Implement multi-lrc submission
Hi Matthew, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master airlied/drm-next v5.15-rc3 next-20210922] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Matthew-Brost/Parallel-submission-aka-multi-bb-execbuf/20211005-061424 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-a004-20211004 (attached as .config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project c0039de2953d15815448b4b3c3bafb45607781e0) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/3bea3cc438df1105d0d8c1bcc01b96559d4bb78c git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Matthew-Brost/Parallel-submission-aka-multi-bb-execbuf/20211005-061424 git checkout 3bea3cc438df1105d0d8c1bcc01b96559d4bb78c # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): >> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1486:7: warning: variable >> 'ret' is used uninitialized whenever 'if' condition is false >> [-Wsometimes-uninitialized] if (multi_lrc_submit(rq)) { ^~~~ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1499:9: note: uninitialized use occurs here return ret; ^~~ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1486:3: note: remove the 'if' if its condition is always true if (multi_lrc_submit(rq)) { ^~ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:1479:9: note: initialize the variable 'ret' to silence this warning int ret; ^ = 0 1 warning generated. vim +1486 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 1475 1476 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1477 struct i915_request *rq) 1478 { 1479 int ret; 1480 1481 __i915_request_submit(rq); 1482 1483 trace_i915_request_in(rq, 0); 1484 1485 if (is_multi_lrc_rq(rq)) { > 1486 if (multi_lrc_submit(rq)) { 1487 ret = guc_wq_item_append(guc, rq); 1488 if (!ret) 1489 ret = guc_add_request(guc, rq); 1490 } 1491 } else { 1492 guc_set_lrc_tail(rq); 1493 ret = guc_add_request(guc, rq); 1494 } 1495 1496 if (unlikely(ret == -EPIPE)) 1497 disable_submission(guc); 1498 1499 return ret; 1500 } 1501 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org .config.gz Description: application/gzip
[Intel-gfx] [PATCH 12/26] drm/i915/guc: Implement multi-lrc submission
Implement multi-lrc submission via a single workqueue entry and single H2G. The workqueue entry contains an updated tail value for each request, of all the contexts in the multi-lrc submission, and updates these values simultaneously. As such, the tasklet and bypass path have been updated to coalesce requests into a single submission. v2: (John Harrison) - s/wqe/wqi - Use FIELD_PREP macros - Add GEM_BUG_ONs ensures length fits within field - Add comment / white space to intel_guc_write_barrier (Kernel test robot) - Make need_tasklet a static function Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 26 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.h| 8 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 23 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 319 -- drivers/gpu/drm/i915/i915_request.h | 8 + 6 files changed, 335 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 8f8182bf7c11..7191e8439290 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -756,3 +756,29 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) } } } + +void intel_guc_write_barrier(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { + /* +* Ensure intel_uncore_write_fw can be used rather than +* intel_uncore_write. +*/ + GEM_BUG_ON(guc->send_regs.fw_domains); + + /* +* This register is used by the i915 and GuC for MMIO based +* communication. Once we are in this code CTBs are the only +* method the i915 uses to communicate with the GuC so it is +* safe to write to this register (a value of 0 is NOP for MMIO +* communication). If we ever start mixing CTBs and MMIOs a new +* register will have to be chosen. +*/ + intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); + } else { + /* wmb() sufficient for a barrier if in smem */ + wmb(); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index a9f4ec972bfb..147f39cc0f2f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,6 +46,12 @@ struct intel_guc { * submitted until the stalled request is processed. */ struct i915_request *stalled_request; + enum { + STALL_NONE, + STALL_REGISTER_CONTEXT, + STALL_MOVE_LRC_TAIL, + STALL_ADD_REQUEST, + } submission_stall_reason; /* intel_guc_recv interrupt related state */ /** @irq_lock: protects GuC irq state */ @@ -361,4 +367,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_write_barrier(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 20c710a74498..10d1878d2826 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -377,28 +377,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) return ++ct->requests.last_fence; } -static void write_barrier(struct intel_guc_ct *ct) -{ - struct intel_guc *guc = ct_to_guc(ct); - struct intel_gt *gt = guc_to_gt(guc); - - if (i915_gem_object_is_lmem(guc->ct.vma->obj)) { - GEM_BUG_ON(guc->send_regs.fw_domains); - /* -* This register is used by the i915 and GuC for MMIO based -* communication. Once we are in this code CTBs are the only -* method the i915 uses to communicate with the GuC so it is -* safe to write to this register (a value of 0 is NOP for MMIO -* communication). If we ever start mixing CTBs and MMIOs a new -* register will have to be chosen. -*/ - intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0); - } else { - /* wmb() sufficient for a barrier if in smem */ - wmb(); - } -} - static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, @@ -468,7 +446,7 @@ static int ct_write(struct intel_guc_ct *ct, * make sure H2G buffer update and LRC tail update (if this triggering a * submission) are visible before updating the descriptor tail */ - write_barrier(ct); +