Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream
On Mon, Aug 13, 2012 at 09:05:29PM +0100, archibald wrote: Hi list, Here is my attempt at solving the task Remove 3D registers from compute command stream on http://dri.freedesktop.org/wiki/R600ToDo. It's my first attempt at a patch for mesa, so I'd appreciate any comments or advice that people might have. I don't have a Cayman card, so I'm not able to test on that, so that part is officially untested. I ran the opencl-example programs to test the opencl aspect and there was no difference in the number of passed and failed tests (67:4) before and after the patch. OpenArena and my desktop session ran fine afterwards, but I'm having `fun' trying to get piglit to behave so I couldn't do a full regression test. Thanks, Archibald The non-cayman parts of this patch have been committed as 59361d76a5b0b6b77d6e6bc976e02df2e8df9ec3 I wasn't able to test this patch thoroughly on Cayman due to hanging piglit tests, so I dropped that part of the patch. I'll take a look at the cayman changes again once compute support is more mature. Thanks for the patch! -Tom diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 0d6eb4e..acf91ba 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, struct evergreen_compute_resource *resources = ctx-cs_shader_state.shader-resources; - /* Initialize all the registers common to both 3D and compute. Some - * 3D only register will be initialized by this atom as well, but - * this is OK for now. - * - * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in - * evergreen_state.c for the list of registers that are intialized by - * the start_cs_cmd atom. - */ - r600_emit_atom(ctx, ctx-start_cs_cmd.atom); - - /* Initialize all the compute specific registers. + /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list - * of registers initialized by the start_compuet_cs_cmd atom. + * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_atom(ctx, ctx-start_compute_cs_cmd.atom); @@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) int num_threads; int num_stack_entries; - /* We aren't passing the EMIT_EARLY flag as the third argument - * because we will be emitting this atom manually in order to - * ensure it gets emitted after the start_cs_cmd atom. + /* since all required registers are initialised in the + * start_compute_cs_cmd atom, we can EMIT_EARLY here. */ - r600_init_command_buffer(cb, 256, 0); + r600_init_command_buffer(cb, 256, EMIT_EARLY); cb-pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; switch (ctx-family) { @@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) } /* Config Registers */ + evergreen_init_common_regs(cb, ctx-chip_class + , ctx-family, ctx-screen-info.drm_minor); /* The primitive type always needs to be POINTLIST for compute. */ r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 67ae7d3..addc36a 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, 0x8000); r600_store_value(cb, 0x8000); + cayman_init_common_regs(cb); + r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2); r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */ /* always set the temp clauses */ r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */ - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ - - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 8)); - - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0); - r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ @@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream
Hi, In the future, could you use git format-patch to send patches. Overall, I think this looks OK, I just need to test it out a little bit. -Tom On Mon, Aug 13, 2012 at 09:05:29PM +0100, archibald wrote: diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 0d6eb4e..acf91ba 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, struct evergreen_compute_resource *resources = ctx-cs_shader_state.shader-resources; - /* Initialize all the registers common to both 3D and compute. Some - * 3D only register will be initialized by this atom as well, but - * this is OK for now. - * - * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in - * evergreen_state.c for the list of registers that are intialized by - * the start_cs_cmd atom. - */ - r600_emit_atom(ctx, ctx-start_cs_cmd.atom); - - /* Initialize all the compute specific registers. + /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list - * of registers initialized by the start_compuet_cs_cmd atom. + * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_atom(ctx, ctx-start_compute_cs_cmd.atom); @@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) int num_threads; int num_stack_entries; - /* We aren't passing the EMIT_EARLY flag as the third argument - * because we will be emitting this atom manually in order to - * ensure it gets emitted after the start_cs_cmd atom. + /* since all required registers are initialised in the + * start_compute_cs_cmd atom, we can EMIT_EARLY here. */ - r600_init_command_buffer(cb, 256, 0); + r600_init_command_buffer(cb, 256, EMIT_EARLY); cb-pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; switch (ctx-family) { @@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) } /* Config Registers */ + evergreen_init_common_regs(cb, ctx-chip_class + , ctx-family, ctx-screen-info.drm_minor); /* The primitive type always needs to be POINTLIST for compute. */ r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 67ae7d3..addc36a 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, 0x8000); r600_store_value(cb, 0x8000); + cayman_init_common_regs(cb); + r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2); r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */ /* always set the temp clauses */ r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */ - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ - - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 8)); - - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0); - r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ @@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */ r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */ - r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2); - r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */ - r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */ - r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2); r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */ r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */ r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 1) | 1); + r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); + + r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2); + r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */ + r600_store_value(cb, 0x3F80); /* R_02802C_DB_DEPTH_CLEAR */ + + r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); + +
Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream
On Wed, Aug 15, 2012 at 12:30:38PM -0400, Matt Harvey wrote: Hey, I'd be interested in testing this to make sure it works, but I have a HD4200 running r600g, and I don't think that card has opencl support at the moment. Do you know if that would still be useful for testing, and what I would use for testing? Also, I was having trouble getting the proper libraries installed for building from source last weekend, so it might take me until after next weekend to get anything tested. Does the mailing list think that would be useful? Unfortunately, there is no compute support yet for HD4200, so there is not much to test for this patch. However, it is always useful to have people do piglit runs on various cards to identify regressions. If you need help setting up piglit, or mesa, the best thing to do is to stop by #radeon on irc.freenode.net and ask for questions. -Tom Matt On Mon, Aug 13, 2012 at 4:05 PM, archibald archib...@ethernull.org wrote: Hi list, Here is my attempt at solving the task Remove 3D registers from compute command stream on http://dri.freedesktop.org/**wiki/R600ToDohttp://dri.freedesktop.org/wiki/R600ToDo. It's my first attempt at a patch for mesa, so I'd appreciate any comments or advice that people might have. I don't have a Cayman card, so I'm not able to test on that, so that part is officially untested. I ran the opencl-example programs to test the opencl aspect and there was no difference in the number of passed and failed tests (67:4) before and after the patch. OpenArena and my desktop session ran fine afterwards, but I'm having `fun' trying to get piglit to behave so I couldn't do a full regression test. Thanks, Archibald ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Remove 3D registers from compute command stream
Hey, I'd be interested in testing this to make sure it works, but I have a HD4200 running r600g, and I don't think that card has opencl support at the moment. Do you know if that would still be useful for testing, and what I would use for testing? Also, I was having trouble getting the proper libraries installed for building from source last weekend, so it might take me until after next weekend to get anything tested. Does the mailing list think that would be useful? Matt On Mon, Aug 13, 2012 at 4:05 PM, archibald archib...@ethernull.org wrote: Hi list, Here is my attempt at solving the task Remove 3D registers from compute command stream on http://dri.freedesktop.org/**wiki/R600ToDohttp://dri.freedesktop.org/wiki/R600ToDo. It's my first attempt at a patch for mesa, so I'd appreciate any comments or advice that people might have. I don't have a Cayman card, so I'm not able to test on that, so that part is officially untested. I ran the opencl-example programs to test the opencl aspect and there was no difference in the number of passed and failed tests (67:4) before and after the patch. OpenArena and my desktop session ran fine afterwards, but I'm having `fun' trying to get piglit to behave so I couldn't do a full regression test. Thanks, Archibald ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] Remove 3D registers from compute command stream
Hi list, Here is my attempt at solving the task Remove 3D registers from compute command stream on http://dri.freedesktop.org/wiki/R600ToDo. It's my first attempt at a patch for mesa, so I'd appreciate any comments or advice that people might have. I don't have a Cayman card, so I'm not able to test on that, so that part is officially untested. I ran the opencl-example programs to test the opencl aspect and there was no difference in the number of passed and failed tests (67:4) before and after the patch. OpenArena and my desktop session ran fine afterwards, but I'm having `fun' trying to get piglit to behave so I couldn't do a full regression test. Thanks, Archibalddiff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 0d6eb4e..acf91ba 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, struct evergreen_compute_resource *resources = ctx-cs_shader_state.shader-resources; - /* Initialize all the registers common to both 3D and compute. Some - * 3D only register will be initialized by this atom as well, but - * this is OK for now. - * - * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in - * evergreen_state.c for the list of registers that are intialized by - * the start_cs_cmd atom. - */ - r600_emit_atom(ctx, ctx-start_cs_cmd.atom); - - /* Initialize all the compute specific registers. + /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list - * of registers initialized by the start_compuet_cs_cmd atom. + * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_atom(ctx, ctx-start_compute_cs_cmd.atom); @@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) int num_threads; int num_stack_entries; - /* We aren't passing the EMIT_EARLY flag as the third argument - * because we will be emitting this atom manually in order to - * ensure it gets emitted after the start_cs_cmd atom. + /* since all required registers are initialised in the + * start_compute_cs_cmd atom, we can EMIT_EARLY here. */ - r600_init_command_buffer(cb, 256, 0); + r600_init_command_buffer(cb, 256, EMIT_EARLY); cb-pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; switch (ctx-family) { @@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) } /* Config Registers */ + evergreen_init_common_regs(cb, ctx-chip_class + , ctx-family, ctx-screen-info.drm_minor); /* The primitive type always needs to be POINTLIST for compute. */ r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 67ae7d3..addc36a 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, 0x8000); r600_store_value(cb, 0x8000); + cayman_init_common_regs(cb); + r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2); r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */ /* always set the temp clauses */ r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */ - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ - - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 8)); - - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0); - r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ @@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */ r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */ - r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2); - r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */ - r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */ - r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2); r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */ r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */ r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 1) | 1); + r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); + + r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2); + r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */ + r600_store_value(cb, 0x3F80); /* R_02802C_DB_DEPTH_CLEAR */ + +