Hi list,

Here is my attempt at solving the task "Remove 3D registers from compute
command stream" on http://dri.freedesktop.org/wiki/R600ToDo. It's my
first attempt at a patch for mesa, so I'd appreciate any comments or
advice that people might have.

I don't have a Cayman card, so I'm not able to test on that, so that part
is officially untested.

I ran the opencl-example programs to test the opencl aspect and there was no difference in the number of passed and failed tests (67:4) before and after the patch. OpenArena and my desktop session ran fine afterwards, but
I'm having `fun' trying to get piglit to behave so I couldn't do a
full regression test.

Thanks,
Archibald
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 0d6eb4e..acf91ba 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -325,20 +325,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	struct evergreen_compute_resource *resources =
 					ctx->cs_shader_state.shader->resources;
 
-	/* Initialize all the registers common to both 3D and compute.  Some
-	 * 3D only register will be initialized by this atom as well, but
-	 * this is OK for now.
-	 *
-	 * See evergreen_init_atom_start_cs() or cayman_init_atom_start_cs() in
-	 * evergreen_state.c for the list of registers that are intialized by
-	 * the start_cs_cmd atom.
-	 */
-	r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
-
-	/* Initialize all the compute specific registers.
+	/* Initialize all the compute-related registers.
 	 *
 	 * See evergreen_init_atom_start_compute_cs() in this file for the list
-	 * of registers initialized by the start_compuet_cs_cmd atom.
+	 * of registers initialized by the start_compute_cs_cmd atom.
 	 */
 	r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom);
 
@@ -590,11 +580,10 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
 	int num_threads;
 	int num_stack_entries;
 
-	/* We aren't passing the EMIT_EARLY flag as the third argument
-	 * because we will be emitting this atom manually in order to
-	 * ensure it gets emitted after the start_cs_cmd atom.
+	/* since all required registers are initialised in the
+	 * start_compute_cs_cmd atom, we can EMIT_EARLY here.
 	 */
-	r600_init_command_buffer(cb, 256, 0);
+	r600_init_command_buffer(cb, 256, EMIT_EARLY);
 	cb->pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
 
 	switch (ctx->family) {
@@ -643,6 +632,8 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
 	}
 
 	/* Config Registers */
+	evergreen_init_common_regs(cb, ctx->chip_class
+			, ctx->family, ctx->screen->info.drm_minor);
 
 	/* The primitive type always needs to be POINTLIST for compute. */
 	r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 67ae7d3..addc36a 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1901,19 +1901,13 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, 0x80000000);
 	r600_store_value(cb, 0x80000000);
 
+	cayman_init_common_regs(cb);
+
 	r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
 	r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
 	/* always set the temp clauses */
 	r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
 
-	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
-	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
-	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
-
-	r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
-
-	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
-
 	r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
 	r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
 	r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
@@ -1929,16 +1923,77 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
 	r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
 
-	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
-	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
-	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
-
 	r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
 	r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
 	r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
 
 	r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
 
+	r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
+
+	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
+	r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
+	r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
+
+	r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
+
+	r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
+	r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+
+	r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
+	r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
+
+	r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+	r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+	r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+	r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+	r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
+	r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
+	r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
+	r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+
+	r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
+	r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
+	r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
+
+	r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
+	r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
+	r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
+
+	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
+
+	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
+	if (rctx->screen->has_streamout) {
+		r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+	}
+
+	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
+	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
+}
+
+void cayman_init_common_regs(struct r600_command_buffer *cb)
+{
+	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
+
+	r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+
+	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
+
+	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
+
+	r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+
+	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
+
+	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
+	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
+	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
+
 	r600_store_context_reg(cb, CM_R_028AA8_IA_MULTI_VGT_PARAM, S_028AA8_SWITCH_ON_EOP(1) | S_028AA8_PARTIAL_VS_WAVE_ON(1) | S_028AA8_PRIMGROUP_SIZE(63));
 
 	r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
@@ -1987,75 +2042,43 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */
 	r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */
 
-	r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
-
 	r600_store_context_reg_seq(cb, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
 	r600_store_value(cb, ~0); /* CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 */
 	r600_store_value(cb, ~0); /* CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 */
 
-	r600_store_context_reg_seq(cb, R_028028_DB_STENCIL_CLEAR, 2);
-	r600_store_value(cb, 0); /* R_028028_DB_STENCIL_CLEAR */
-	r600_store_value(cb, 0x3F800000); /* R_02802C_DB_DEPTH_CLEAR */
-
-	r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
-
 	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
 	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
 	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
 	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
 
-	r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
-	r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
-
 	r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
 	r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
 	r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
 
 	r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
-	r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
-	r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
-	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
-
-	r600_store_context_reg_seq(cb, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
-	r600_store_value(cb, 0x00000400); /* CM_R_028BDC_PA_SC_LINE_CNTL */
-	r600_store_value(cb, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
-
-	r600_store_context_reg_seq(cb, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
-	r600_store_value(cb, 0x3F800000); /* CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
-	r600_store_value(cb, 0x3F800000); /* CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
-	r600_store_value(cb, 0x3F800000); /* CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
-	r600_store_value(cb, 0x3F800000); /* CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
-
-	r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
-	r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
-	r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
 
-	r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
-	r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
-	r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
+	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
 
 	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
-	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
 
 	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
-	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
-	if (rctx->screen->has_streamout) {
-		r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
-	}
-
-	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
-	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
 }
 
-void evergreen_init_atom_start_cs(struct r600_context *rctx)
+void evergreen_init_common_regs(struct r600_command_buffer *cb
+	, enum chip_class ctx_chip_class
+	, enum radeon_family ctx_family
+	, int ctx_drm_minor)
 {
-	struct r600_command_buffer *cb = &rctx->start_cs_cmd;
 	int ps_prio;
 	int vs_prio;
 	int gs_prio;
 	int es_prio;
-	int hs_prio, cs_prio, ls_prio;
+
+	int hs_prio;
+	int cs_prio;
+	int ls_prio;
+
 	int num_ps_gprs;
 	int num_vs_gprs;
 	int num_gs_gprs;
@@ -2063,12 +2086,213 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	int num_hs_gprs;
 	int num_ls_gprs;
 	int num_temp_gprs;
+
+	unsigned tmp;
+
+	if (ctx_chip_class == CAYMAN) {
+		cayman_init_common_regs(cb);
+		return;
+	}
+
+	ps_prio = 0;
+	vs_prio = 1;
+	gs_prio = 2;
+	es_prio = 3;
+	hs_prio = 0;
+	ls_prio = 0;
+	cs_prio = 0;
+
+	switch (ctx_family) {
+	case CHIP_CEDAR:
+	default:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_REDWOOD:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_JUNIPER:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_PALM:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_SUMO:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_SUMO2:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_BARTS:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_TURKS:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	case CHIP_CAICOS:
+		num_ps_gprs = 93;
+		num_vs_gprs = 46;
+		num_temp_gprs = 4;
+		num_gs_gprs = 31;
+		num_es_gprs = 31;
+		num_hs_gprs = 23;
+		num_ls_gprs = 23;
+		break;
+	}
+
+	tmp = 0;
+	switch (ctx_family) {
+	case CHIP_CEDAR:
+	case CHIP_PALM:
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
+	case CHIP_CAICOS:
+		break;
+	default:
+		tmp |= S_008C00_VC_ENABLE(1);
+		break;
+	}
+	tmp |= S_008C00_EXPORT_SRC_C(1);
+	tmp |= S_008C00_CS_PRIO(cs_prio);
+	tmp |= S_008C00_LS_PRIO(ls_prio);
+	tmp |= S_008C00_HS_PRIO(hs_prio);
+	tmp |= S_008C00_PS_PRIO(ps_prio);
+	tmp |= S_008C00_VS_PRIO(vs_prio);
+	tmp |= S_008C00_GS_PRIO(gs_prio);
+	tmp |= S_008C00_ES_PRIO(es_prio);
+
+	/* enable dynamic GPR resource management */
+	if (ctx_drm_minor >= 7) {
+		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
+		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
+		/* always set temp clauses */
+		r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
+		r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+		r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+		r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
+		r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+		r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
+					S_028838_PS_GPRS(0x1e) |
+					S_028838_VS_GPRS(0x1e) |
+					S_028838_GS_GPRS(0x1e) |
+					S_028838_ES_GPRS(0x1e) |
+					S_028838_HS_GPRS(0x1e) |
+					S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
+	} else {
+		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
+		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
+
+		tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
+		tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+		tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
+		r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
+
+		tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
+		tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
+		r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
+
+		tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
+		tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
+		r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
+	}
+
+	r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
+			      S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
+
+	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
+
+	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
+	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
+	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
+
+	r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
+
+	r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
+	r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
+	r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
+
+	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
+	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
+	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
+	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
+
+	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
+
+	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+
+	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
+
+	return;
+}
+
+void evergreen_init_atom_start_cs(struct r600_context *rctx)
+{
+	struct r600_command_buffer *cb = &rctx->start_cs_cmd;
 	int num_ps_threads;
 	int num_vs_threads;
 	int num_gs_threads;
 	int num_es_threads;
 	int num_hs_threads;
 	int num_ls_threads;
+
 	int num_ps_stack_entries;
 	int num_vs_stack_entries;
 	int num_gs_stack_entries;
@@ -2090,25 +2314,13 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, 0x80000000);
 	r600_store_value(cb, 0x80000000);
 
-	family = rctx->family;
-	ps_prio = 0;
-	vs_prio = 1;
-	gs_prio = 2;
-	es_prio = 3;
-	hs_prio = 0;
-	ls_prio = 0;
-	cs_prio = 0;
+	evergreen_init_common_regs(cb, rctx->chip_class
+			, rctx->family, rctx->screen->info.drm_minor);
 
+	family = rctx->family;
 	switch (family) {
 	case CHIP_CEDAR:
 	default:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 96;
 		num_vs_threads = 16;
 		num_gs_threads = 16;
@@ -2123,13 +2335,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 42;
 		break;
 	case CHIP_REDWOOD:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 128;
 		num_vs_threads = 20;
 		num_gs_threads = 20;
@@ -2144,13 +2349,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 42;
 		break;
 	case CHIP_JUNIPER:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 128;
 		num_vs_threads = 20;
 		num_gs_threads = 20;
@@ -2166,13 +2364,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		break;
 	case CHIP_CYPRESS:
 	case CHIP_HEMLOCK:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 128;
 		num_vs_threads = 20;
 		num_gs_threads = 20;
@@ -2187,13 +2378,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 85;
 		break;
 	case CHIP_PALM:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 96;
 		num_vs_threads = 16;
 		num_gs_threads = 16;
@@ -2208,13 +2392,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 42;
 		break;
 	case CHIP_SUMO:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 96;
 		num_vs_threads = 25;
 		num_gs_threads = 25;
@@ -2229,13 +2406,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 42;
 		break;
 	case CHIP_SUMO2:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 96;
 		num_vs_threads = 25;
 		num_gs_threads = 25;
@@ -2250,13 +2420,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 85;
 		break;
 	case CHIP_BARTS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 128;
 		num_vs_threads = 20;
 		num_gs_threads = 20;
@@ -2271,13 +2434,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 85;
 		break;
 	case CHIP_TURKS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 128;
 		num_vs_threads = 20;
 		num_gs_threads = 20;
@@ -2292,13 +2448,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		num_ls_stack_entries = 42;
 		break;
 	case CHIP_CAICOS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
 		num_ps_threads = 128;
 		num_vs_threads = 10;
 		num_gs_threads = 10;
@@ -2314,66 +2463,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 		break;
 	}
 
-	tmp = 0;
-	switch (family) {
-	case CHIP_CEDAR:
-	case CHIP_PALM:
-	case CHIP_SUMO:
-	case CHIP_SUMO2:
-	case CHIP_CAICOS:
-		break;
-	default:
-		tmp |= S_008C00_VC_ENABLE(1);
-		break;
-	}
-	tmp |= S_008C00_EXPORT_SRC_C(1);
-	tmp |= S_008C00_CS_PRIO(cs_prio);
-	tmp |= S_008C00_LS_PRIO(ls_prio);
-	tmp |= S_008C00_HS_PRIO(hs_prio);
-	tmp |= S_008C00_PS_PRIO(ps_prio);
-	tmp |= S_008C00_VS_PRIO(vs_prio);
-	tmp |= S_008C00_GS_PRIO(gs_prio);
-	tmp |= S_008C00_ES_PRIO(es_prio);
-
-	/* enable dynamic GPR resource management */
-	if (rctx->screen->info.drm_minor >= 7) {
-		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
-		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
-		/* always set temp clauses */
-		r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
-		r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
-		r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
-		r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
-		r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
-		r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
-					S_028838_PS_GPRS(0x1e) |
-					S_028838_VS_GPRS(0x1e) |
-					S_028838_GS_GPRS(0x1e) |
-					S_028838_ES_GPRS(0x1e) |
-					S_028838_HS_GPRS(0x1e) |
-					S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
-	} else {
-		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
-		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
-
-		tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
-		tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
-		tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-		r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
-
-		tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
-		tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-		r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
-
-		tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
-		tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
-		r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
-	}
-
 	tmp = S_008C18_NUM_PS_THREADS(num_ps_threads);
 	tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
 	tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
 	tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
+
 	r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
 	r600_store_value(cb, tmp); /* R_008C18_SQ_THREAD_RESOURCE_MGMT_1 */
 
@@ -2393,14 +2487,9 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
 	r600_store_value(cb, tmp); /* R_008C28_SQ_STACK_RESOURCE_MGMT_3 */
 
-	r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
-			      S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
-
 	r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0);
 	r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4));
 
-	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);
-
 	r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6);
 	r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */
 	r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */
@@ -2430,10 +2519,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
 	r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
 
-	r600_store_context_reg_seq(cb, R_028B94_VGT_STRMOUT_CONFIG, 2);
-	r600_store_value(cb, 0); /* R_028B94_VGT_STRMOUT_CONFIG */
-	r600_store_value(cb, 0); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
-
 	r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
 	r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
 	r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
@@ -2484,23 +2569,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 
 	r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
 	r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
-	r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
-
-	r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
-	r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */
-	r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */
 
 	r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
 	r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F);
 	r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
 
-	r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
-	r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
-	r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
-	r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
-
-	r600_store_context_reg(cb, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
-
 	r600_store_context_reg_seq(cb, R_028C00_PA_SC_LINE_CNTL, 2);
 	r600_store_value(cb, 0x00000400); /* R_028C00_PA_SC_LINE_CNTL */
 	r600_store_value(cb, 0); /* R_028C04_PA_SC_AA_CONFIG */
@@ -2522,11 +2595,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
 	r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
 
-	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
-	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
 
-	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
 	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
 	if (rctx->screen->has_streamout) {
 		r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 0464183..cf2e61e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -447,6 +447,13 @@ static INLINE void r600_atom_dirty(struct r600_context *rctx, struct r600_atom *
 }
 
 /* evergreen_state.c */
+void cayman_init_common_regs(struct r600_command_buffer *cb);
+
+void evergreen_init_common_regs(struct r600_command_buffer *cb,
+				enum chip_class ctx_chip_class,
+				enum radeon_family ctx_family,
+				int ctx_drm_minor);
+
 void evergreen_init_state_functions(struct r600_context *rctx);
 void evergreen_init_atom_start_cs(struct r600_context *rctx);
 void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to