Hi list,
Here is my attempt at solving the task "Remove 3D registers from
compute
command stream" on http://dri.freedesktop.org/wiki/R600ToDo. It's my
first attempt at a patch for mesa, so I'd appreciate any comments or
advice that people might have.
I don't have a Cayman card, so I'm not able to test on that, so that
part
is officially untested.
I ran the opencl-example programs to test the opencl aspect and there
was
no difference in the number of passed and failed tests (67:4) before
and
after the patch. OpenArena and my desktop session ran fine afterwards,
but
I'm having `fun' trying to get piglit to behave so I couldn't do a
full regression test.
Thanks,
Archibald
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 0d6eb4e..acf91ba 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
struct evergreen_compute_resource *resources =
ctx->cs_shader_state.shader->resources;
- /* Initialize all the registers common to both 3D and compute. Some
- * 3D only register will be initialized by this atom as well, but
- * this is OK for now.
- *
- * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in
- * evergreen_state.c for the list of registers that are intialized by
- * the start_cs_cmd atom.
- */
- r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
-
- /* Initialize all the compute specific registers.
+ /* Initialize all the compute-related registers.
*
* See evergreen_init_atom_start_compute_cs() in this file for the list
- * of registers initialized by the start_compuet_cs_cmd atom.
+ * of registers initialized by the start_compute_cs_cmd atom.
*/
r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom);
@@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
int num_threads;
int num_stack_entries;
- /* We aren't passing the EMIT_EARLY flag as the third argument
- * because we will be emitting this atom manually in order to
- * ensure it gets emitted after the start_cs_cmd atom.
+ /* since all required registers are initialised in the
+ * start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
- r600_init_command_buffer(cb, 256, 0);
+ r600_init_command_buffer(cb, 256, EMIT_EARLY);
cb->pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
switch (ctx->family) {
@@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
}
/* Config Registers */
+ evergreen_init_common_regs(cb, ctx->chip_class
+ , ctx->family, ctx->screen->info.drm_minor);
/* The primitive type always needs to be POINTLIST for compute. */
r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 67ae7d3..addc36a 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0x80000000);
r600_store_value(cb, 0x80000000);
+ cayman_init_common_regs(cb);
+
r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
/* always set the temp clauses */
r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
- r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
- r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
- r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
-
- r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
-
- r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
-
r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
@@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
- r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
- r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
- r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
-
r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
+ r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
+
+ r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
+ r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
+ r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+
+ r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
+
+ r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
+ r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+
+ r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
+ r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
+
+ r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+ r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+ r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+ r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
+ r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
+ r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
+ r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+
+ r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
+ r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
+ r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
+
+ r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
+ r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
+ r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
+
+ r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
+
+ r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
+ if (rctx->screen->has_streamout) {
+ r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+ }
+
+ eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
+ eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
+}
+
+void cayman_init_common_regs(struct r600_command_buffer *cb)
+{
+ r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+ r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+ r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
+
+ r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+
+ r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
+
+ r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+ r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+ r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
+
+ r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+
+ r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
+
+ r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
+ r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
+ r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
+
r600_store_context_reg(cb, CM_R_028AA8_IA_MULTI_VGT_PARAM, S_028AA8_SWITCH_ON_EOP(1) | S_028AA8_PARTIAL_VS_WAVE_ON(1) | S_028AA8_PRIMGROUP_SIZE(63));
r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
@@ -1987,75 +2042,43 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */
r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */
- r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
-
r600_store_context_reg_seq(cb, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */
r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */
- r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
- r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
- r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
-
- r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
-
r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
- r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
- r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
-
r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
- r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
- r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
- r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
-
- r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
- r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
- r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
-
- r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
- r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
- r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
- r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
- r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
-
- r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
- r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
- r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
- r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
- r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
- r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
+ r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
- r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
- r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
- if (rctx->screen->has_streamout) {
- r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
- }
-
- eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
- eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
}
-void evergreen_init_atom_start_cs(struct r600_context *rctx)
+void evergreen_init_common_regs(struct r600_command_buffer *cb
+ , enum chip_class ctx_chip_class
+ , enum radeon_family ctx_family
+ , int ctx_drm_minor)
{
- struct r600_command_buffer *cb = &rctx->start_cs_cmd;
int ps_prio;
int vs_prio;
int gs_prio;
int es_prio;
- int hs_prio, cs_prio, ls_prio;
+
+ int hs_prio;
+ int cs_prio;
+ int ls_prio;
+
int num_ps_gprs;
int num_vs_gprs;
int num_gs_gprs;
@@ -2063,12 +2086,213 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
int num_hs_gprs;
int num_ls_gprs;
int num_temp_gprs;
+
+ unsigned tmp;
+
+ if (ctx_chip_class == CAYMAN) {
+ cayman_init_common_regs(cb);
+ return;
+ }
+
+ ps_prio = 0;
+ vs_prio = 1;
+ gs_prio = 2;
+ es_prio = 3;
+ hs_prio = 0;
+ ls_prio = 0;
+ cs_prio = 0;
+
+ switch (ctx_family) {
+ case CHIP_CEDAR:
+ default:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_REDWOOD:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_JUNIPER:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_CYPRESS:
+ case CHIP_HEMLOCK:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_PALM:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_SUMO:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_SUMO2:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_BARTS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_TURKS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ case CHIP_CAICOS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ break;
+ }
+
+ tmp = 0;
+ switch (ctx_family) {
+ case CHIP_CEDAR:
+ case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ case CHIP_CAICOS:
+ break;
+ default:
+ tmp |= S_008C00_VC_ENABLE(1);
+ break;
+ }
+ tmp |= S_008C00_EXPORT_SRC_C(1);
+ tmp |= S_008C00_CS_PRIO(cs_prio);
+ tmp |= S_008C00_LS_PRIO(ls_prio);
+ tmp |= S_008C00_HS_PRIO(hs_prio);
+ tmp |= S_008C00_PS_PRIO(ps_prio);
+ tmp |= S_008C00_VS_PRIO(vs_prio);
+ tmp |= S_008C00_GS_PRIO(gs_prio);
+ tmp |= S_008C00_ES_PRIO(es_prio);
+
+ /* enable dynamic GPR resource management */
+ if (ctx_drm_minor >= 7) {
+ r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
+ r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
+ /* always set temp clauses */
+ r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
+ r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+ r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+ r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
+ r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+ r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
+ S_028838_PS_GPRS(0x1e) |
+ S_028838_VS_GPRS(0x1e) |
+ S_028838_GS_GPRS(0x1e) |
+ S_028838_ES_GPRS(0x1e) |
+ S_028838_HS_GPRS(0x1e) |
+ S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
+ } else {
+ r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
+ r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
+
+ tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
+ tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+ tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
+ r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
+
+ tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
+ tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
+ r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
+
+ tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
+ tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
+ r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
+ }
+
+ r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
+ S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
+
+ r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
+
+ r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
+ r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
+ r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
+
+ r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
+
+ r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
+ r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
+ r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
+
+ r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
+ r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
+ r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
+ r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
+
+ r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
+
+ r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+ r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+
+ r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
+
+ return;
+}
+
+void evergreen_init_atom_start_cs(struct r600_context *rctx)
+{
+ struct r600_command_buffer *cb = &rctx->start_cs_cmd;
int num_ps_threads;
int num_vs_threads;
int num_gs_threads;
int num_es_threads;
int num_hs_threads;
int num_ls_threads;
+
int num_ps_stack_entries;
int num_vs_stack_entries;
int num_gs_stack_entries;
@@ -2090,25 +2314,13 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0x80000000);
r600_store_value(cb, 0x80000000);
- family = rctx->family;
- ps_prio = 0;
- vs_prio = 1;
- gs_prio = 2;
- es_prio = 3;
- hs_prio = 0;
- ls_prio = 0;
- cs_prio = 0;
+ evergreen_init_common_regs(cb, rctx->chip_class
+ , rctx->family, rctx->screen->info.drm_minor);
+ family = rctx->family;
switch (family) {
case CHIP_CEDAR:
default:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 16;
num_gs_threads = 16;
@@ -2123,13 +2335,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 42;
break;
case CHIP_REDWOOD:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
@@ -2144,13 +2349,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 42;
break;
case CHIP_JUNIPER:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
@@ -2166,13 +2364,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
break;
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
@@ -2187,13 +2378,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 85;
break;
case CHIP_PALM:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 16;
num_gs_threads = 16;
@@ -2208,13 +2392,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 42;
break;
case CHIP_SUMO:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 25;
num_gs_threads = 25;
@@ -2229,13 +2406,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 42;
break;
case CHIP_SUMO2:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 96;
num_vs_threads = 25;
num_gs_threads = 25;
@@ -2250,13 +2420,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 85;
break;
case CHIP_BARTS:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
@@ -2271,13 +2434,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 85;
break;
case CHIP_TURKS:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 20;
num_gs_threads = 20;
@@ -2292,13 +2448,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
num_ls_stack_entries = 42;
break;
case CHIP_CAICOS:
- num_ps_gprs = 93;
- num_vs_gprs = 46;
- num_temp_gprs = 4;
- num_gs_gprs = 31;
- num_es_gprs = 31;
- num_hs_gprs = 23;
- num_ls_gprs = 23;
num_ps_threads = 128;
num_vs_threads = 10;
num_gs_threads = 10;
@@ -2314,66 +2463,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
break;
}
- tmp = 0;
- switch (family) {
- case CHIP_CEDAR:
- case CHIP_PALM:
- case CHIP_SUMO:
- case CHIP_SUMO2:
- case CHIP_CAICOS:
- break;
- default:
- tmp |= S_008C00_VC_ENABLE(1);
- break;
- }
- tmp |= S_008C00_EXPORT_SRC_C(1);
- tmp |= S_008C00_CS_PRIO(cs_prio);
- tmp |= S_008C00_LS_PRIO(ls_prio);
- tmp |= S_008C00_HS_PRIO(hs_prio);
- tmp |= S_008C00_PS_PRIO(ps_prio);
- tmp |= S_008C00_VS_PRIO(vs_prio);
- tmp |= S_008C00_GS_PRIO(gs_prio);
- tmp |= S_008C00_ES_PRIO(es_prio);
-
- /* enable dynamic GPR resource management */
- if (rctx->screen->info.drm_minor >= 7) {
- r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
- r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
- /* always set temp clauses */
- r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
- r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
- r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
- r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
- r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
- r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
- S_028838_PS_GPRS(0x1e) |
- S_028838_VS_GPRS(0x1e) |
- S_028838_GS_GPRS(0x1e) |
- S_028838_ES_GPRS(0x1e) |
- S_028838_HS_GPRS(0x1e) |
- S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
- } else {
- r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
- r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
-
- tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
- tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
- r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
-
- tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
- tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
- r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
-
- tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
- tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
- r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
- }
-
tmp = S_008C18_NUM_PS_THREADS(num_ps_threads);
tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
+
r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
r600_store_value(cb, tmp); /* R_008C18_SQ_THREAD_RESOURCE_MGMT_1 */
@@ -2393,14 +2487,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
r600_store_value(cb, tmp); /* R_008C28_SQ_STACK_RESOURCE_MGMT_3 */
- r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
- S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
-
r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0);
r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4));
- r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
-
r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6);
r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */
r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */
@@ -2430,10 +2519,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
- r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
- r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
- r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
-
r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
@@ -2484,23 +2569,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
- r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
-
- r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
- r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
- r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
- r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
- r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
- r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
- r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
-
- r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
-
r600_store_context_reg_seq(cb, R_028C00_PA_SC_LINE_CNTL, 2);
r600_store_value(cb, 0x00000400); /* R_028C00_PA_SC_LINE_CNTL */
r600_store_value(cb, 0); /* R_028C04_PA_SC_AA_CONFIG */
@@ -2522,11 +2595,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
- r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
- r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
- r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
if (rctx->screen->has_streamout) {
r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 0464183..cf2e61e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -447,6 +447,13 @@ static INLINE void r600_atom_dirty(struct r600_context *rctx, struct r600_atom *
}
/* evergreen_state.c */
+void cayman_init_common_regs(struct r600_command_buffer *cb);
+
+void evergreen_init_common_regs(struct r600_command_buffer *cb,
+ enum chip_class ctx_chip_class,
+ enum radeon_family ctx_family,
+ int ctx_drm_minor);
+
void evergreen_init_state_functions(struct r600_context *rctx);
void evergreen_init_atom_start_cs(struct r600_context *rctx);
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev