"Mcaulay, Alistair" <alistair.mcau...@intel.com> writes:

> Hi Mika,
>
> can you please review this patch, and verify it fixes the issues in your 
> previous review.
>
> Thanks,
> Alistair.
>
>> -----Original Message-----
>> From: Mcaulay, Alistair
>> Sent: Friday, August 15, 2014 6:52 PM
>> To: intel-gfx@lists.freedesktop.org
>> Cc: Mcaulay, Alistair
>> Subject: [PATCH v3] drm/i915: Rework GPU reset sequence to match driver
>> load & thaw
>> 
>> From: "McAulay, Alistair" <alistair.mcau...@intel.com>
>> 
>> This patch is to address Daniels concerns over different code during reset:
>> 
>> http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html
>> 
>> "The reason for aiming as hard as possible to use the exact same code for
>> driver load, gpu reset and runtime pm/system resume is that we've simply
>> seen too many bugs due to slight variations and unintended omissions."
>> 
>> Tested using igt drv_hangman.
>> 
>> V2: Cleaner way of preventing check_wedge returning -EAGAIN
>> V3: Clean the last_context during reset, to ensure do_switch() does the
>> MI_SET_CONTEXT. As per review.
>> Signed-off-by: McAulay, Alistair <alistair.mcau...@intel.com>
>> ---

Reviewed-by: Mika Kuoppala <mika.kuopp...@intel.com>

>>  drivers/gpu/drm/i915/i915_drv.c         |  6 +++
>>  drivers/gpu/drm/i915/i915_drv.h         |  3 ++
>>  drivers/gpu/drm/i915/i915_gem.c         |  4 +-
>>  drivers/gpu/drm/i915/i915_gem_context.c | 33 +++-------------
>>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 67 
>> +++++----------------------------
>>  drivers/gpu/drm/i915/i915_gem_gtt.h     |  3 +-
>>  6 files changed, 28 insertions(+), 88 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c
>> b/drivers/gpu/drm/i915/i915_drv.c index 5e4fefd..3bfafe6 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -806,7 +806,13 @@ int i915_reset(struct drm_device *dev)
>>                      !dev_priv->ums.mm_suspended) {
>>              dev_priv->ums.mm_suspended = 0;
>> 
>> +            /* Used to prevent gem_check_wedged returning -EAGAIN
>> during gpu reset */
>> +            dev_priv->gpu_error.reload_in_reset = true;
>> +
>>              ret = i915_gem_init_hw(dev);
>> +
>> +            dev_priv->gpu_error.reload_in_reset = false;
>> +
>>              mutex_unlock(&dev->struct_mutex);
>>              if (ret) {
>>                      DRM_ERROR("Failed hw init on reset %d\n", ret); diff
>> --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 991b663..116daff 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1217,6 +1217,9 @@ struct i915_gpu_error {
>> 
>>      /* For missed irq/seqno simulation. */
>>      unsigned int test_irq_rings;
>> +
>> +    /* Used to prevent gem_check_wedged returning -EAGAIN during
>> gpu reset   */
>> +    bool reload_in_reset;
>>  };
>> 
>>  enum modeset_restore {
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c index ef047bc..e7396eb 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -1085,7 +1085,9 @@ i915_gem_check_wedge(struct i915_gpu_error
>> *error,
>>              if (i915_terminally_wedged(error))
>>                      return -EIO;
>> 
>> -            return -EAGAIN;
>> +            /* Check if GPU Reset is in progress */
>> +            if (!error->reload_in_reset)
>> +                    return -EAGAIN;
>>      }
>> 
>>      return 0;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index de72a28..9378ad8 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -377,34 +377,17 @@ void i915_gem_context_reset(struct drm_device
>> *dev)
>>      struct drm_i915_private *dev_priv = dev->dev_private;
>>      int i;
>> 
>> -    /* Prevent the hardware from restoring the last context (which
>> hung) on
>> -     * the next switch */
>>      for (i = 0; i < I915_NUM_RINGS; i++) {
>>              struct intel_engine_cs *ring = &dev_priv->ring[i];
>> -            struct intel_context *dctx = ring->default_context;
>>              struct intel_context *lctx = ring->last_context;
>> 
>> -            /* Do a fake switch to the default context */
>> -            if (lctx == dctx)
>> -                    continue;
>> -
>> -            if (!lctx)
>> -                    continue;
>> +            if (lctx) {
>> +                    if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
>> +                            i915_gem_object_ggtt_unpin(lctx-
>> >legacy_hw_ctx.rcs_state);
>> 
>> -            if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
>> -                    WARN_ON(i915_gem_obj_ggtt_pin(dctx-
>> >legacy_hw_ctx.rcs_state,
>> -
>> get_context_alignment(dev), 0));
>> -                    /* Fake a finish/inactive */
>> -                    dctx->legacy_hw_ctx.rcs_state->base.write_domain
>> = 0;
>> -                    dctx->legacy_hw_ctx.rcs_state->active = 0;
>> +                    i915_gem_context_unreference(lctx);
>> +                    ring->last_context = NULL;
>>              }
>> -
>> -            if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
>> -                    i915_gem_object_ggtt_unpin(lctx-
>> >legacy_hw_ctx.rcs_state);
>> -
>> -            i915_gem_context_unreference(lctx);
>> -            i915_gem_context_reference(dctx);
>> -            ring->last_context = dctx;
>>      }
>>  }
>> 
>> @@ -498,10 +481,6 @@ int i915_gem_context_enable(struct
>> drm_i915_private *dev_priv)
>>              ppgtt->enable(ppgtt);
>>      }
>> 
>> -    /* FIXME: We should make this work, even in reset */
>> -    if (i915_reset_in_progress(&dev_priv->gpu_error))
>> -            return 0;
>> -
>>      BUG_ON(!dev_priv->ring[RCS].default_context);
>> 
>>      for_each_ring(ring, dev_priv, i) {
>> @@ -645,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring,
>>      from = ring->last_context;
>> 
>>      if (USES_FULL_PPGTT(ring->dev)) {
>> -            ret = ppgtt->switch_mm(ppgtt, ring, false);
>> +            ret = ppgtt->switch_mm(ppgtt, ring);
>>              if (ret)
>>                      goto unpin_out;
>>      }
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 5188936..450c8a9 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -216,19 +216,12 @@ static gen6_gtt_pte_t
>> iris_pte_encode(dma_addr_t addr,
>> 
>>  /* Broadwell Page Directory Pointer Descriptors */  static int
>> gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
>> -                       uint64_t val, bool synchronous)
>> +                       uint64_t val)
>>  {
>> -    struct drm_i915_private *dev_priv = ring->dev->dev_private;
>>      int ret;
>> 
>>      BUG_ON(entry >= 4);
>> 
>> -    if (synchronous) {
>> -            I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
>> -            I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
>> -            return 0;
>> -    }
>> -
>>      ret = intel_ring_begin(ring, 6);
>>      if (ret)
>>              return ret;
>> @@ -245,8 +238,7 @@ static int gen8_write_pdp(struct intel_engine_cs
>> *ring, unsigned entry,  }
>> 
>>  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>> -                      struct intel_engine_cs *ring,
>> -                      bool synchronous)
>> +                      struct intel_engine_cs *ring)
>>  {
>>      int i, ret;
>> 
>> @@ -255,7 +247,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt
>> *ppgtt,
>> 
>>      for (i = used_pd - 1; i >= 0; i--) {
>>              dma_addr_t addr = ppgtt->pd_dma_addr[i];
>> -            ret = gen8_write_pdp(ring, i, addr, synchronous);
>> +            ret = gen8_write_pdp(ring, i, addr);
>>              if (ret)
>>                      return ret;
>>      }
>> @@ -724,29 +716,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt
>> *ppgtt)  }
>> 
>>  static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>> -                     struct intel_engine_cs *ring,
>> -                     bool synchronous)
>> +                     struct intel_engine_cs *ring)
>>  {
>> -    struct drm_device *dev = ppgtt->base.dev;
>> -    struct drm_i915_private *dev_priv = dev->dev_private;
>>      int ret;
>> 
>> -    /* If we're in reset, we can assume the GPU is sufficiently idle to
>> -     * manually frob these bits. Ideally we could use the ring functions,
>> -     * except our error handling makes it quite difficult (can't use
>> -     * intel_ring_begin, ring->flush, or intel_ring_advance)
>> -     *
>> -     * FIXME: We should try not to special case reset
>> -     */
>> -    if (synchronous ||
>> -        i915_reset_in_progress(&dev_priv->gpu_error)) {
>> -            WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
>> -            I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
>> -            I915_WRITE(RING_PP_DIR_BASE(ring),
>> get_pd_offset(ppgtt));
>> -            POSTING_READ(RING_PP_DIR_BASE(ring));
>> -            return 0;
>> -    }
>> -
>>      /* NB: TLBs must be flushed and invalidated before a switch */
>>      ret = ring->flush(ring, I915_GEM_GPU_DOMAINS,
>> I915_GEM_GPU_DOMAINS);
>>      if (ret)
>> @@ -768,29 +741,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt
>> *ppgtt,  }
>> 
>>  static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>> -                      struct intel_engine_cs *ring,
>> -                      bool synchronous)
>> +                      struct intel_engine_cs *ring)
>>  {
>> -    struct drm_device *dev = ppgtt->base.dev;
>> -    struct drm_i915_private *dev_priv = dev->dev_private;
>>      int ret;
>> 
>> -    /* If we're in reset, we can assume the GPU is sufficiently idle to
>> -     * manually frob these bits. Ideally we could use the ring functions,
>> -     * except our error handling makes it quite difficult (can't use
>> -     * intel_ring_begin, ring->flush, or intel_ring_advance)
>> -     *
>> -     * FIXME: We should try not to special case reset
>> -     */
>> -    if (synchronous ||
>> -        i915_reset_in_progress(&dev_priv->gpu_error)) {
>> -            WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
>> -            I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
>> -            I915_WRITE(RING_PP_DIR_BASE(ring),
>> get_pd_offset(ppgtt));
>> -            POSTING_READ(RING_PP_DIR_BASE(ring));
>> -            return 0;
>> -    }
>> -
>>      /* NB: TLBs must be flushed and invalidated before a switch */
>>      ret = ring->flush(ring, I915_GEM_GPU_DOMAINS,
>> I915_GEM_GPU_DOMAINS);
>>      if (ret)
>> @@ -819,14 +773,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt
>> *ppgtt,  }
>> 
>>  static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
>> -                      struct intel_engine_cs *ring,
>> -                      bool synchronous)
>> +                      struct intel_engine_cs *ring)
>>  {
>>      struct drm_device *dev = ppgtt->base.dev;
>>      struct drm_i915_private *dev_priv = dev->dev_private;
>> 
>> -    if (!synchronous)
>> -            return 0;
>> 
>>      I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
>>      I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); @@ -
>> 852,7 +803,7 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>>              if (USES_FULL_PPGTT(dev))
>>                      continue;
>> 
>> -            ret = ppgtt->switch_mm(ppgtt, ring, true);
>> +            ret = ppgtt->switch_mm(ppgtt, ring);
>>              if (ret)
>>                      goto err_out;
>>      }
>> @@ -897,7 +848,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt
>> *ppgtt)
>>              if (USES_FULL_PPGTT(dev))
>>                      continue;
>> 
>> -            ret = ppgtt->switch_mm(ppgtt, ring, true);
>> +            ret = ppgtt->switch_mm(ppgtt, ring);
>>              if (ret)
>>                      return ret;
>>      }
>> @@ -926,7 +877,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt
>> *ppgtt)
>>      I915_WRITE(GFX_MODE,
>> _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
>> 
>>      for_each_ring(ring, dev_priv, i) {
>> -            int ret = ppgtt->switch_mm(ppgtt, ring, true);
>> +            int ret = ppgtt->switch_mm(ppgtt, ring);
>>              if (ret)
>>                      return ret;
>>      }
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> index 8d6f7c1..bf1e4fc 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> @@ -262,8 +262,7 @@ struct i915_hw_ppgtt {
>> 
>>      int (*enable)(struct i915_hw_ppgtt *ppgtt);
>>      int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
>> -                     struct intel_engine_cs *ring,
>> -                     bool synchronous);
>> +                     struct intel_engine_cs *ring);
>>      void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file
>> *m);  };
>> 
>> --
>> 2.0.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to