[Intel-gfx] [PATCH 3/3] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)

2019-04-19 Thread Chris Wilson
Broadwater and the rest of gen4  do support being able to saving and
reloading context specific registers between contexts, providing isolation
of the basic GPU state (as programmable by userspace). This allows
userspace to assume that the GPU retains their state from one batch to the
next, minimising the amount of state it needs to reload and manually save
across batches.

v2: CONSTANT_BUFFER woes

Running through piglit turned up an interesting issue, a GPU hang inside
the context load. The context image includes the CONSTANT_BUFFER command
that loads an address into a on-gpu buffer, and the context load was
executing that immediately. However, since it was reading from the GTT
there is no guarantee that the GTT retains the same configuration as
when the context was saved, resulting in stray reads and a GPU hang.

Having tried issuing a CONSTANT_BUFFER (to disable the command) from the
ring before saving the context to no avail, we resort to patching out
the instruction inside the context image before loading.

This does impose that gen4 always reissues CONSTANT_BUFFER commands on
each batch, but due to the use of a shared GTT that was and will remain
a requirement.

v3: ECOSPKD to the rescue

Ville found the magic bit in the ECOSPKD to disable saving and restoring
the CONSTANT_BUFFER from the context image, thereby completely avoiding
the GPU hangs from chasing invalid pointers. This appears to be the
default behaviour for gen5, and so we just need to tweak gen4 to match.

Signed-off-by: Chris Wilson 
Cc: Ville Syrjälä 
Cc: Kenneth Graunke 
---
 drivers/gpu/drm/i915/i915_reg.h |  3 +++
 drivers/gpu/drm/i915/intel_engine_cs.c  |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b74824f0b5b1..5815703ac35f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2665,6 +2665,9 @@ enum i915_power_well_id {
 # define MODE_IDLE (1 << 9)
 # define STOP_RING (1 << 8)
 
+#define ECOSPKD_MMIO(0x21d0)
+# define CONSTANT_BUFFER_SR_DISABLE BIT(4)
+
 #define GEN6_GT_MODE   _MMIO(0x20d0)
 #define GEN7_GT_MODE   _MMIO(0x7008)
 #define   GEN6_WIZ_HASHING(hi, lo) (((hi) << 9) | ((lo) << 
7))
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index fc8be2fcb4e6..f9db2e0bca12 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -211,6 +211,7 @@ __intel_engine_context_size(struct drm_i915_private 
*dev_priv, u8 class)
return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 5:
+   case 4:
/*
 * There is a discrepancy here between the size reported
 * by the register and the size of the context layout
@@ -227,7 +228,6 @@ __intel_engine_context_size(struct drm_i915_private 
*dev_priv, u8 class)
 cxt_size * 64,
 cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
-   case 4:
case 3:
case 2:
/* For the special day when i810 gets merged. */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2d2e33cd3fae..26b276ed00b3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -832,6 +832,20 @@ static int init_render_ring(struct intel_engine_cs *engine)
 {
struct drm_i915_private *dev_priv = engine->i915;
 
+   /*
+* Disable CONSTANT_BUFFER before it is loaded from the context
+* image. For as it is loaded, it is executed and the stored
+* address may no longer be valid, leading to a GPU hang.
+*
+* This imposes the requirement that userspace reload their
+* CONSTANT_BUFFER on every batch, fortunately a requirement
+* they are already accustomed to from before contexts were
+* enabled.
+*/
+   if (IS_GEN(dev_priv, 4))
+   I915_WRITE(ECOSPKD,
+  _MASKED_BIT_ENABLE(CONSTANT_BUFFER_SR_DISABLE));
+
/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
if (IS_GEN_RANGE(dev_priv, 4, 6))
I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 3/3] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)

2019-01-28 Thread Chris Wilson
Broadwater and the rest of gen4  do support being able to saving and
reloading context specific registers between contexts, providing isolation
of the basic GPU state (as programmable by userspace). This allows
userspace to assume that the GPU retains their state from one batch to the
next, minimising the amount of state it needs to reload and manually save
across batches.

v2: CONSTANT_BUFFER woes

Running through piglit turned up an interesting issue, a GPU hang inside
the context load. The context image includes the CONSTANT_BUFFER command
that loads an address into a on-gpu buffer, and the context load was
executing that immediately. However, since it was reading from the GTT
there is no guarantee that the GTT retains the same configuration as
when the context was saved, resulting in stray reads and a GPU hang.

Having tried issuing a CONSTANT_BUFFER (to disable the command) from the
ring before saving the context to no avail, we resort to patching out
the instruction inside the context image before loading.

This does impose that gen4 always reissues CONSTANT_BUFFER commands on
each batch, but due to the use of a shared GTT that was and will remain
a requirement.

Signed-off-by: Chris Wilson 
Cc: Ville Syrjälä 
Cc: Kenneth Graunke 
Reviewed-by: Ville Syrjälä  #v1
---
 drivers/gpu/drm/i915/intel_engine_cs.c|  2 +-
 drivers/gpu/drm/i915/intel_gpu_commands.h |  3 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c   | 17 +
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 148c3e06a2eb..32bd850eec30 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -220,6 +220,7 @@ __intel_engine_context_size(struct drm_i915_private 
*dev_priv, u8 class)
return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 5:
+   case 4:
/*
 * There is a discrepancy here between the size reported
 * by the register and the size of the context layout
@@ -236,7 +237,6 @@ __intel_engine_context_size(struct drm_i915_private 
*dev_priv, u8 class)
 cxt_size * 64,
 cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
-   case 4:
case 3:
case 2:
/* For the special day when i810 gets merged. */
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/intel_gpu_commands.h
index b96a31bc1080..a95bfd922c41 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -265,6 +265,9 @@
 #define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
 
+#define GFX_OP_CONSTANT_BUFFER \
+   (0x3 << 29 | 0x0 << 27 | 0x0 << 24 | 0x2 << 16)
+
 #define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
 
 #define COLOR_BLT ((0x2<<29)|(0x40<<22))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 19def67bf1c5..c03d156d59d9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1734,6 +1734,8 @@ static inline int mi_set_context(struct i915_request *rq, 
u32 flags)
len += 2 + (num_rings ? 4*num_rings + 6 : 0);
else if (IS_GEN(i915, 5))
len += 2;
+   else if (IS_GEN(i915, 4))
+   len += 4;
if (flags & MI_FORCE_RESTORE) {
GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
flags &= ~MI_FORCE_RESTORE;
@@ -1770,6 +1772,21 @@ static inline int mi_set_context(struct i915_request 
*rq, u32 flags)
 * this should never take effect and so be a no-op!
 */
*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
+   } else if (IS_GEN(i915, 4)) {
+   /*
+* Disable CONSTANT_BUFFER before it is loaded from the context
+* image. For as it is loaded, it is executed and the stored
+* address may no longer be valid, leading to a GPU hang.
+*
+* This imposes the requirement that userspace reload their
+* CONSTANT_BUFFER on every batch, fortunately a requirement
+* they are already accustomed to from before contexts were
+* enabled.
+*/
+   *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+   *cs++ = 0;
+   *cs++ = i915_ggtt_offset(rq->hw_context->state) + 0x1d4;
+   *cs++ = GFX_OP_CONSTANT_BUFFER; /* inactive */
}
 
if (force_restore) {
-- 
2.20.1

___
Intel-gfx mailing list

Re: [Intel-gfx] [PATCH 3/3] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)

2019-01-12 Thread Chris Wilson
Quoting Ville Syrjälä (2019-01-10 16:03:21)
> After a bit of digging I found a few more potentially related
> tidbits:
> 
> ECOSPKD[4] Constant Buffer Save/Restore Disable [DevBW-C1+]

Does the trick for DevCL. I see it is still listed for g4x, so seems safe
enough to keep for IS_GEN(4) and that it would seem mere good fortune
that it did fail for DevCL highlighting the CONSTANT_BUFFER issue.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/3] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)

2019-01-10 Thread Chris Wilson
Quoting Ville Syrjälä (2019-01-10 16:03:21)
> On Thu, Jan 10, 2019 at 10:38:07AM +, Chris Wilson wrote:
> > Broadwater and the rest of gen4  do support being able to saving and
> > reloading context specific registers between contexts, providing isolation
> > of the basic GPU state (as programmable by userspace). This allows
> > userspace to assume that the GPU retains their state from one batch to the
> > next, minimising the amount of state it needs to reload and manually save
> > across batches.
> > 
> > v2: CONSTANT_BUFFER woes
> > 
> > Running through piglit turned up an interesting issue, a GPU hang inside
> > the context load. The context image includes the CONSTANT_BUFFER command
> > that loads an address into a on-gpu buffer, and the context load was
> > executing that immediately. However, since it was reading from the GTT
> > there is no guarantee that the GTT retains the same configuration as
> > when the context was saved, resulting in stray reads and a GPU hang.
> > 
> > Having tried issuing a CONSTANT_BUFFER (to disable the command) from the
> > ring before saving the context to no avail, we resort to patching out
> > the instruction inside the context image before loading.
> > 
> > This does impose that gen4 always reissues CONSTANT_BUFFER commands on
> > each batch, but due to the use of a shared GTT that was and will remain
> > a requirement.
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Ville Syrjälä 
> > Cc: Kenneth Graunke 
> > Reviewed-by: Ville Syrjälä  #v1
> > ---
> >  drivers/gpu/drm/i915/intel_engine_cs.c|  2 +-
> >  drivers/gpu/drm/i915/intel_gpu_commands.h |  3 +++
> >  drivers/gpu/drm/i915/intel_ringbuffer.c   | 17 +
> >  3 files changed, 21 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> > b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index f89b8f199e3f..88109e0de051 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -219,6 +219,7 @@ __intel_engine_context_size(struct drm_i915_private 
> > *dev_priv, u8 class)
> >   return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
> >   PAGE_SIZE);
> >   case 5:
> > + case 4:
> >   /*
> >* There is a discrepancy here between the size 
> > reported
> >* by the register and the size of the context layout
> > @@ -235,7 +236,6 @@ __intel_engine_context_size(struct drm_i915_private 
> > *dev_priv, u8 class)
> >cxt_size * 64,
> >cxt_size - 1);
> >   return round_up(cxt_size * 64, PAGE_SIZE);
> > - case 4:
> >   case 3:
> >   case 2:
> >   /* For the special day when i810 gets merged. */
> > diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h 
> > b/drivers/gpu/drm/i915/intel_gpu_commands.h
> > index 105e2a9e874a..00c0175c37ed 100644
> > --- a/drivers/gpu/drm/i915/intel_gpu_commands.h
> > +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
> > @@ -266,6 +266,9 @@
> >  #define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
> >   ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
> >  
> > +#define GFX_OP_CONSTANT_BUFFER \
> > + (0x3 << 29 | 0x0 << 27 | 0x0 << 24 | 0x2 << 16)
> > +
> >  #define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
> >  
> >  #define COLOR_BLT ((0x2<<29)|(0x40<<22))
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> > b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 889f3de79dd0..21bd71cf2e94 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -1632,6 +1632,8 @@ static inline int mi_set_context(struct i915_request 
> > *rq, u32 flags)
> >   len += 2 + (num_rings ? 4*num_rings + 6 : 0);
> >   else if (IS_GEN(i915, 5))
> >   len += 2;
> > + else if (IS_GEN(i915, 4))
> > + len += 4;
> >   if (flags & MI_FORCE_RESTORE) {
> >   GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
> >   flags &= ~MI_FORCE_RESTORE;
> > @@ -1668,6 +1670,21 @@ static inline int mi_set_context(struct i915_request 
> > *rq, u32 flags)
> >* this should never take effect and so be a no-op!
> >*/
> >   *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
> > + } else if (IS_GEN(i915, 4)) {
> > + /*
> > +  * Disable CONSTANT_BUFFER before it is loaded from the 
> > context
> > +  * image. For as it is loaded, it is executed and the stored
> > +  * address may no longer be valid, leading to a GPU hang.
> > +  *
> > +  * This imposes the requirement that userspace reload their
> > +  * CONSTANT_BUFFER on every batch, fortunately a requirement
> > +  * they are already accustomed to 

Re: [Intel-gfx] [PATCH 3/3] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)

2019-01-10 Thread Ville Syrjälä
On Thu, Jan 10, 2019 at 10:38:07AM +, Chris Wilson wrote:
> Broadwater and the rest of gen4  do support being able to saving and
> reloading context specific registers between contexts, providing isolation
> of the basic GPU state (as programmable by userspace). This allows
> userspace to assume that the GPU retains their state from one batch to the
> next, minimising the amount of state it needs to reload and manually save
> across batches.
> 
> v2: CONSTANT_BUFFER woes
> 
> Running through piglit turned up an interesting issue, a GPU hang inside
> the context load. The context image includes the CONSTANT_BUFFER command
> that loads an address into a on-gpu buffer, and the context load was
> executing that immediately. However, since it was reading from the GTT
> there is no guarantee that the GTT retains the same configuration as
> when the context was saved, resulting in stray reads and a GPU hang.
> 
> Having tried issuing a CONSTANT_BUFFER (to disable the command) from the
> ring before saving the context to no avail, we resort to patching out
> the instruction inside the context image before loading.
> 
> This does impose that gen4 always reissues CONSTANT_BUFFER commands on
> each batch, but due to the use of a shared GTT that was and will remain
> a requirement.
> 
> Signed-off-by: Chris Wilson 
> Cc: Ville Syrjälä 
> Cc: Kenneth Graunke 
> Reviewed-by: Ville Syrjälä  #v1
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c|  2 +-
>  drivers/gpu/drm/i915/intel_gpu_commands.h |  3 +++
>  drivers/gpu/drm/i915/intel_ringbuffer.c   | 17 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/intel_engine_cs.c
> index f89b8f199e3f..88109e0de051 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -219,6 +219,7 @@ __intel_engine_context_size(struct drm_i915_private 
> *dev_priv, u8 class)
>   return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
>   PAGE_SIZE);
>   case 5:
> + case 4:
>   /*
>* There is a discrepancy here between the size reported
>* by the register and the size of the context layout
> @@ -235,7 +236,6 @@ __intel_engine_context_size(struct drm_i915_private 
> *dev_priv, u8 class)
>cxt_size * 64,
>cxt_size - 1);
>   return round_up(cxt_size * 64, PAGE_SIZE);
> - case 4:
>   case 3:
>   case 2:
>   /* For the special day when i810 gets merged. */
> diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/intel_gpu_commands.h
> index 105e2a9e874a..00c0175c37ed 100644
> --- a/drivers/gpu/drm/i915/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
> @@ -266,6 +266,9 @@
>  #define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
>   ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
>  
> +#define GFX_OP_CONSTANT_BUFFER \
> + (0x3 << 29 | 0x0 << 27 | 0x0 << 24 | 0x2 << 16)
> +
>  #define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
>  
>  #define COLOR_BLT ((0x2<<29)|(0x40<<22))
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 889f3de79dd0..21bd71cf2e94 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1632,6 +1632,8 @@ static inline int mi_set_context(struct i915_request 
> *rq, u32 flags)
>   len += 2 + (num_rings ? 4*num_rings + 6 : 0);
>   else if (IS_GEN(i915, 5))
>   len += 2;
> + else if (IS_GEN(i915, 4))
> + len += 4;
>   if (flags & MI_FORCE_RESTORE) {
>   GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
>   flags &= ~MI_FORCE_RESTORE;
> @@ -1668,6 +1670,21 @@ static inline int mi_set_context(struct i915_request 
> *rq, u32 flags)
>* this should never take effect and so be a no-op!
>*/
>   *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
> + } else if (IS_GEN(i915, 4)) {
> + /*
> +  * Disable CONSTANT_BUFFER before it is loaded from the context
> +  * image. For as it is loaded, it is executed and the stored
> +  * address may no longer be valid, leading to a GPU hang.
> +  *
> +  * This imposes the requirement that userspace reload their
> +  * CONSTANT_BUFFER on every batch, fortunately a requirement
> +  * they are already accustomed to from before contexts were
> +  * enabled.
> +  */
> + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> + *cs++ = 0;
> + *cs++ = i915_ggtt_offset(rq->hw_context->state) + 0x1d4;

Is that offset 

[Intel-gfx] [PATCH 3/3] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)

2019-01-10 Thread Chris Wilson
Broadwater and the rest of gen4  do support being able to saving and
reloading context specific registers between contexts, providing isolation
of the basic GPU state (as programmable by userspace). This allows
userspace to assume that the GPU retains their state from one batch to the
next, minimising the amount of state it needs to reload and manually save
across batches.

v2: CONSTANT_BUFFER woes

Running through piglit turned up an interesting issue, a GPU hang inside
the context load. The context image includes the CONSTANT_BUFFER command
that loads an address into a on-gpu buffer, and the context load was
executing that immediately. However, since it was reading from the GTT
there is no guarantee that the GTT retains the same configuration as
when the context was saved, resulting in stray reads and a GPU hang.

Having tried issuing a CONSTANT_BUFFER (to disable the command) from the
ring before saving the context to no avail, we resort to patching out
the instruction inside the context image before loading.

This does impose that gen4 always reissues CONSTANT_BUFFER commands on
each batch, but due to the use of a shared GTT that was and will remain
a requirement.

Signed-off-by: Chris Wilson 
Cc: Ville Syrjälä 
Cc: Kenneth Graunke 
Reviewed-by: Ville Syrjälä  #v1
---
 drivers/gpu/drm/i915/intel_engine_cs.c|  2 +-
 drivers/gpu/drm/i915/intel_gpu_commands.h |  3 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c   | 17 +
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index f89b8f199e3f..88109e0de051 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -219,6 +219,7 @@ __intel_engine_context_size(struct drm_i915_private 
*dev_priv, u8 class)
return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 5:
+   case 4:
/*
 * There is a discrepancy here between the size reported
 * by the register and the size of the context layout
@@ -235,7 +236,6 @@ __intel_engine_context_size(struct drm_i915_private 
*dev_priv, u8 class)
 cxt_size * 64,
 cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
-   case 4:
case 3:
case 2:
/* For the special day when i810 gets merged. */
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/intel_gpu_commands.h
index 105e2a9e874a..00c0175c37ed 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -266,6 +266,9 @@
 #define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
 
+#define GFX_OP_CONSTANT_BUFFER \
+   (0x3 << 29 | 0x0 << 27 | 0x0 << 24 | 0x2 << 16)
+
 #define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
 
 #define COLOR_BLT ((0x2<<29)|(0x40<<22))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 889f3de79dd0..21bd71cf2e94 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1632,6 +1632,8 @@ static inline int mi_set_context(struct i915_request *rq, 
u32 flags)
len += 2 + (num_rings ? 4*num_rings + 6 : 0);
else if (IS_GEN(i915, 5))
len += 2;
+   else if (IS_GEN(i915, 4))
+   len += 4;
if (flags & MI_FORCE_RESTORE) {
GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
flags &= ~MI_FORCE_RESTORE;
@@ -1668,6 +1670,21 @@ static inline int mi_set_context(struct i915_request 
*rq, u32 flags)
 * this should never take effect and so be a no-op!
 */
*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
+   } else if (IS_GEN(i915, 4)) {
+   /*
+* Disable CONSTANT_BUFFER before it is loaded from the context
+* image. For as it is loaded, it is executed and the stored
+* address may no longer be valid, leading to a GPU hang.
+*
+* This imposes the requirement that userspace reload their
+* CONSTANT_BUFFER on every batch, fortunately a requirement
+* they are already accustomed to from before contexts were
+* enabled.
+*/
+   *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+   *cs++ = 0;
+   *cs++ = i915_ggtt_offset(rq->hw_context->state) + 0x1d4;
+   *cs++ = GFX_OP_CONSTANT_BUFFER; /* inactive */
}
 
if (force_restore) {
-- 
2.20.1

___
Intel-gfx mailing list