Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream

2012-08-20 Thread Tom Stellard
On Mon, Aug 13, 2012 at 09:05:29PM +0100, archibald wrote:
 Hi list,
 
 Here is my attempt at solving the task Remove 3D registers from
 compute
 command stream on http://dri.freedesktop.org/wiki/R600ToDo. It's my
 first attempt at a patch for mesa, so I'd appreciate any comments or
 advice that people might have.
 
 I don't have a Cayman card, so I'm not able to test on that, so that
 part
 is officially untested.
 
 I ran the opencl-example programs to test the opencl aspect and
 there was
 no difference in the number of passed and failed tests (67:4) before
 and
 after the patch. OpenArena and my desktop session ran fine
 afterwards, but
 I'm having `fun' trying to get piglit to behave so I couldn't do a
 full regression test.
 
 Thanks,
 Archibald

The non-cayman parts of this patch have been committed as
59361d76a5b0b6b77d6e6bc976e02df2e8df9ec3 I wasn't able to test this
patch thoroughly on Cayman due to hanging piglit tests, so I dropped
that part of the patch.  I'll take a look at the cayman changes again
once compute support is more mature.

Thanks for the patch!

-Tom

 diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
 b/src/gallium/drivers/r600/evergreen_compute.c
 index 0d6eb4e..acf91ba 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.c
 +++ b/src/gallium/drivers/r600/evergreen_compute.c
 @@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, 
 const uint *block_layout,
   struct evergreen_compute_resource *resources =
   ctx-cs_shader_state.shader-resources;
  
 - /* Initialize all the registers common to both 3D and compute.  Some
 -  * 3D only register will be initialized by this atom as well, but
 -  * this is OK for now.
 -  *
 -  * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in
 -  * evergreen_state.c for the list of registers that are intialized by
 -  * the start_cs_cmd atom.
 -  */
 - r600_emit_atom(ctx, ctx-start_cs_cmd.atom);
 -
 - /* Initialize all the compute specific registers.
 + /* Initialize all the compute-related registers.
*
* See evergreen_init_atom_start_compute_cs() in this file for the list
 -  * of registers initialized by the start_compuet_cs_cmd atom.
 +  * of registers initialized by the start_compute_cs_cmd atom.
*/
   r600_emit_atom(ctx, ctx-start_compute_cs_cmd.atom);
  
 @@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct 
 r600_context *ctx)
   int num_threads;
   int num_stack_entries;
  
 - /* We aren't passing the EMIT_EARLY flag as the third argument
 -  * because we will be emitting this atom manually in order to
 -  * ensure it gets emitted after the start_cs_cmd atom.
 + /* since all required registers are initialised in the
 +  * start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
 - r600_init_command_buffer(cb, 256, 0);
 + r600_init_command_buffer(cb, 256, EMIT_EARLY);
   cb-pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
  
   switch (ctx-family) {
 @@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct 
 r600_context *ctx)
   }
  
   /* Config Registers */
 + evergreen_init_common_regs(cb, ctx-chip_class
 + , ctx-family, ctx-screen-info.drm_minor);
  
   /* The primitive type always needs to be POINTLIST for compute. */
   r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
 diff --git a/src/gallium/drivers/r600/evergreen_state.c 
 b/src/gallium/drivers/r600/evergreen_state.c
 index 67ae7d3..addc36a 100644
 --- a/src/gallium/drivers/r600/evergreen_state.c
 +++ b/src/gallium/drivers/r600/evergreen_state.c
 @@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct 
 r600_context *rctx)
   r600_store_value(cb, 0x8000);
   r600_store_value(cb, 0x8000);
  
 + cayman_init_common_regs(cb);
 +
   r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
   r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
   /* always set the temp clauses */
   r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* 
 R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
  
 - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 
 2);
 - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
 - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
 -
 - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1  
 8));
 -
 - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
 -
   r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
   r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
   r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
 @@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct 
 r600_context *rctx)
   r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
 

Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream

2012-08-16 Thread Tom Stellard
Hi,

In the future, could you use git format-patch to send patches.

Overall, I think this looks OK, I just need to test it out a little bit.

-Tom

On Mon, Aug 13, 2012 at 09:05:29PM +0100, archibald wrote:
 diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
 b/src/gallium/drivers/r600/evergreen_compute.c
 index 0d6eb4e..acf91ba 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.c
 +++ b/src/gallium/drivers/r600/evergreen_compute.c
 @@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, 
 const uint *block_layout,
   struct evergreen_compute_resource *resources =
   ctx-cs_shader_state.shader-resources;
  
 - /* Initialize all the registers common to both 3D and compute.  Some
 -  * 3D only register will be initialized by this atom as well, but
 -  * this is OK for now.
 -  *
 -  * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in
 -  * evergreen_state.c for the list of registers that are intialized by
 -  * the start_cs_cmd atom.
 -  */
 - r600_emit_atom(ctx, ctx-start_cs_cmd.atom);
 -
 - /* Initialize all the compute specific registers.
 + /* Initialize all the compute-related registers.
*
* See evergreen_init_atom_start_compute_cs() in this file for the list
 -  * of registers initialized by the start_compuet_cs_cmd atom.
 +  * of registers initialized by the start_compute_cs_cmd atom.
*/
   r600_emit_atom(ctx, ctx-start_compute_cs_cmd.atom);
  
 @@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct 
 r600_context *ctx)
   int num_threads;
   int num_stack_entries;
  
 - /* We aren't passing the EMIT_EARLY flag as the third argument
 -  * because we will be emitting this atom manually in order to
 -  * ensure it gets emitted after the start_cs_cmd atom.
 + /* since all required registers are initialised in the
 +  * start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
 - r600_init_command_buffer(cb, 256, 0);
 + r600_init_command_buffer(cb, 256, EMIT_EARLY);
   cb-pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
  
   switch (ctx-family) {
 @@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct 
 r600_context *ctx)
   }
  
   /* Config Registers */
 + evergreen_init_common_regs(cb, ctx-chip_class
 + , ctx-family, ctx-screen-info.drm_minor);
  
   /* The primitive type always needs to be POINTLIST for compute. */
   r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
 diff --git a/src/gallium/drivers/r600/evergreen_state.c 
 b/src/gallium/drivers/r600/evergreen_state.c
 index 67ae7d3..addc36a 100644
 --- a/src/gallium/drivers/r600/evergreen_state.c
 +++ b/src/gallium/drivers/r600/evergreen_state.c
 @@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct 
 r600_context *rctx)
   r600_store_value(cb, 0x8000);
   r600_store_value(cb, 0x8000);
  
 + cayman_init_common_regs(cb);
 +
   r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
   r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
   /* always set the temp clauses */
   r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* 
 R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
  
 - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 
 2);
 - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
 - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
 -
 - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1  
 8));
 -
 - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
 -
   r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
   r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
   r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
 @@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct 
 r600_context *rctx)
   r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
   r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
  
 - r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
 - r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
 - r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
 -
   r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
   r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
   r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
  
   r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3  1) | 1);
  
 + r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
 +
 + r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
 + r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
 + r600_store_value(cb, 0x3F80); /* R_02802C_DB_DEPTH_CLEAR */
 +
 + r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
 +
 + 

Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream

2012-08-16 Thread Tom Stellard
On Wed, Aug 15, 2012 at 12:30:38PM -0400, Matt Harvey wrote:
 Hey, I'd be interested in testing this to make sure it works, but I have a
 HD4200 running r600g, and I don't think that card has opencl support at the
 moment. Do you know if that would still be useful for testing, and what I
 would use for testing? Also, I was having trouble getting the proper
 libraries installed for building from source last weekend, so it might take
 me until after next weekend to get anything tested.
 Does the mailing list think that would be useful?


Unfortunately, there is no compute support yet for HD4200, so there is not much
to test for this patch.  However, it is always useful to have people do piglit
runs on various cards to identify regressions.  If you need help setting up
piglit, or mesa, the best thing to do is to stop by #radeon on irc.freenode.net
and ask for questions.

-Tom
 
 Matt
 
 On Mon, Aug 13, 2012 at 4:05 PM, archibald archib...@ethernull.org wrote:
 
  Hi list,
 
  Here is my attempt at solving the task Remove 3D registers from compute
  command stream on 
  http://dri.freedesktop.org/**wiki/R600ToDohttp://dri.freedesktop.org/wiki/R600ToDo.
  It's my
  first attempt at a patch for mesa, so I'd appreciate any comments or
  advice that people might have.
 
  I don't have a Cayman card, so I'm not able to test on that, so that part
  is officially untested.
 
  I ran the opencl-example programs to test the opencl aspect and there was
  no difference in the number of passed and failed tests (67:4) before and
  after the patch. OpenArena and my desktop session ran fine afterwards, but
  I'm having `fun' trying to get piglit to behave so I couldn't do a
  full regression test.
 
  Thanks,
  Archibald
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
 

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream

2012-08-15 Thread Matt Harvey
Hey, I'd be interested in testing this to make sure it works, but I have a
HD4200 running r600g, and I don't think that card has opencl support at the
moment. Do you know if that would still be useful for testing, and what I
would use for testing? Also, I was having trouble getting the proper
libraries installed for building from source last weekend, so it might take
me until after next weekend to get anything tested.
Does the mailing list think that would be useful?

Matt

On Mon, Aug 13, 2012 at 4:05 PM, archibald archib...@ethernull.org wrote:

 Hi list,

 Here is my attempt at solving the task Remove 3D registers from compute
 command stream on 
 http://dri.freedesktop.org/**wiki/R600ToDohttp://dri.freedesktop.org/wiki/R600ToDo.
 It's my
 first attempt at a patch for mesa, so I'd appreciate any comments or
 advice that people might have.

 I don't have a Cayman card, so I'm not able to test on that, so that part
 is officially untested.

 I ran the opencl-example programs to test the opencl aspect and there was
 no difference in the number of passed and failed tests (67:4) before and
 after the patch. OpenArena and my desktop session ran fine afterwards, but
 I'm having `fun' trying to get piglit to behave so I couldn't do a
 full regression test.

 Thanks,
 Archibald
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Remove 3D registers from compute command stream

2012-08-14 Thread archibald

Hi list,

Here is my attempt at solving the task Remove 3D registers from 
compute

command stream on http://dri.freedesktop.org/wiki/R600ToDo. It's my
first attempt at a patch for mesa, so I'd appreciate any comments or
advice that people might have.

I don't have a Cayman card, so I'm not able to test on that, so that 
part

is officially untested.

I ran the opencl-example programs to test the opencl aspect and there 
was
no difference in the number of passed and failed tests (67:4) before 
and
after the patch. OpenArena and my desktop session ran fine afterwards, 
but

I'm having `fun' trying to get piglit to behave so I couldn't do a
full regression test.

Thanks,
Archibalddiff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 0d6eb4e..acf91ba 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	struct evergreen_compute_resource *resources =
 	ctx-cs_shader_state.shader-resources;
 
-	/* Initialize all the registers common to both 3D and compute.  Some
-	 * 3D only register will be initialized by this atom as well, but
-	 * this is OK for now.
-	 *
-	 * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in
-	 * evergreen_state.c for the list of registers that are intialized by
-	 * the start_cs_cmd atom.
-	 */
-	r600_emit_atom(ctx, ctx-start_cs_cmd.atom);
-
-	/* Initialize all the compute specific registers.
+	/* Initialize all the compute-related registers.
 	 *
 	 * See evergreen_init_atom_start_compute_cs() in this file for the list
-	 * of registers initialized by the start_compuet_cs_cmd atom.
+	 * of registers initialized by the start_compute_cs_cmd atom.
 	 */
 	r600_emit_atom(ctx, ctx-start_compute_cs_cmd.atom);
 
@@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
 	int num_threads;
 	int num_stack_entries;
 
-	/* We aren't passing the EMIT_EARLY flag as the third argument
-	 * because we will be emitting this atom manually in order to
-	 * ensure it gets emitted after the start_cs_cmd atom.
+	/* since all required registers are initialised in the
+	 * start_compute_cs_cmd atom, we can EMIT_EARLY here.
 	 */
-	r600_init_command_buffer(cb, 256, 0);
+	r600_init_command_buffer(cb, 256, EMIT_EARLY);
 	cb-pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
 
 	switch (ctx-family) {
@@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
 	}
 
 	/* Config Registers */
+	evergreen_init_common_regs(cb, ctx-chip_class
+			, ctx-family, ctx-screen-info.drm_minor);
 
 	/* The primitive type always needs to be POINTLIST for compute. */
 	r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 67ae7d3..addc36a 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, 0x8000);
 	r600_store_value(cb, 0x8000);
 
+	cayman_init_common_regs(cb);
+
 	r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
 	r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
 	/* always set the temp clauses */
 	r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
 
-	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
-	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
-	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
-
-	r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1  8));
-
-	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
-
 	r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
 	r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
 	r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
@@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
 	r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
 
-	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
-	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
-	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
-
 	r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
 	r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
 	r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
 
 	r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3  1) | 1);
 
+	r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
+
+	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
+	r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
+	r600_store_value(cb, 0x3F80); /* R_02802C_DB_DEPTH_CLEAR */
+
+