On 11/30/2012 01:24 PM, Eric Anholt wrote:
This can be used for two purposes: Using hand-coded shaders to determine per-instruction timings, or figuring out which shader to optimize in a whole application. On the psychonauts trace, we see:type ID cycles spent % of total ... fs16 543: 339343898369 ( 339.34 Gcycles) 9.2% fs16 521: 532215110990 ( 532.22 Gcycles) 14.4% fs16 524: 1036231987390 (1036.23 Gcycles) 28.0% confirming our previous understanding that fragment shaders are where it's all at. But on GLBenchmark 2.7, we get: fs16 69: 205928219888 ( 205.93 Gcycles) 7.5% fs16 75: 364066413095 ( 364.07 Gcycles) 13.2% vs 87: 1107217698878 (1107.22 Gcycles) 40.3% That's interesting. I should look into that. Note that this doesn't cover the instructions that set up the message to the URB/FB write -- we'd need to convert the MRF usage in these instructions to GRFs so that our offsets/times don't overwrite our shader outputs. --- src/mesa/drivers/dri/i965/brw_context.c | 3 + src/mesa/drivers/dri/i965/brw_context.h | 28 ++++- src/mesa/drivers/dri/i965/brw_defines.h | 20 +++- src/mesa/drivers/dri/i965/brw_eu.h | 6 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 55 +++++++++- src/mesa/drivers/dri/i965/brw_fs.cpp | 101 +++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 7 ++ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 4 + src/mesa/drivers/dri/i965/brw_program.c | 128 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 81 ++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4.h | 7 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 4 + src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 10 ++ src/mesa/drivers/dri/i965/brw_vtbl.c | 14 +++ src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 7 ++ src/mesa/drivers/dri/intel/intel_context.c | 6 + src/mesa/drivers/dri/intel/intel_context.h | 1 + 17 files changed, 475 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 4b1b247..5665a3a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -383,6 +383,9 @@ brwCreateContext(int api, brw_fs_alloc_reg_sets(brw); + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + brw_init_shader_time(brw); + return true; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1abaee3..dc25cab 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -559,14 +559,15 @@ struct brw_vs_prog_data { #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1) #define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t)) #define SURF_INDEX_WM_UBO(u) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + u) - +#define SURF_INDEX_WM_SHADER_TIME (SURF_INDEX_WM_UBO(12)) /** Maximum size of the binding table. */ -#define BRW_MAX_WM_SURFACES SURF_INDEX_WM_UBO(BRW_MAX_WM_UBOS) +#define BRW_MAX_WM_SURFACES (SURF_INDEX_WM_SHADER_TIME + 1) #define SURF_INDEX_VERT_CONST_BUFFER (0) #define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t)) #define SURF_INDEX_VS_UBO(u) (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + u) -#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_UBO(BRW_MAX_VS_UBOS) +#define SURF_INDEX_VS_SHADER_TIME (SURF_INDEX_VS_UBO(12)) +#define BRW_MAX_VS_SURFACES (SURF_INDEX_VS_SHADER_TIME + 1) #define SURF_INDEX_SOL_BINDING(t) ((t)) #define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS) @@ -651,6 +652,13 @@ struct brw_tracked_state { void (*emit)( struct brw_context *brw ); }; +enum shader_time_shader_type { + ST_NONE, + ST_VS, + ST_FS8, + ST_FS16, +}; + /* Flags for brw->state.cache. */ #define CACHE_NEW_BLEND_STATE (1<<BRW_BLEND_STATE) @@ -1089,6 +1097,16 @@ struct brw_context uint32_t num_instances; int basevertex; + + struct { + drm_intel_bo *bo; + struct gl_shader_program **programs; + enum shader_time_shader_type *types; + uint64_t *cumulative; + int num_entries; + int max_entries; + double report_time; + } shader_time; }; /*====================================================================== @@ -1144,7 +1162,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); int brw_get_scratch_size(int size); void brw_get_scratch_bo(struct intel_context *intel, drm_intel_bo **scratch_bo, int size); - +void brw_init_shader_time(struct brw_context *brw); +void brw_collect_and_report_shader_time(struct brw_context *brw); +void brw_destroy_shader_time(struct brw_context *brw); /* brw_urb.c */ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6dc4707..b84d8f9 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -665,6 +665,8 @@ enum opcode { SHADER_OPCODE_TXS, FS_OPCODE_TXB, + SHADER_OPCODE_SHADER_TIME_ADD, + FS_OPCODE_DDX, FS_OPCODE_DDY, FS_OPCODE_PIXEL_X, @@ -729,6 +731,8 @@ enum opcode { #define BRW_ARF_CONTROL 0x80 #define BRW_ARF_NOTIFICATION_COUNT 0x90 #define BRW_ARF_IP 0xA0 +#define BRW_ARF_TDR 0xB0 +#define BRW_ARF_TIMESTAMP 0xC0 #define BRW_MRF_COMPR4 (1 << 7) @@ -956,7 +960,21 @@ enum brw_message_target { #define BRW_SCRATCH_SPACE_SIZE_1M 10 #define BRW_SCRATCH_SPACE_SIZE_2M 11
Maybe add an /** URB Atomic Operations */ comment here?
- +#define BRW_AOP_AND 1 +#define BRW_AOP_OR 2 +#define BRW_AOP_XOR 3 +#define BRW_AOP_MOV 4 +#define BRW_AOP_INC 5 +#define BRW_AOP_DEC 6 +#define BRW_AOP_ADD 7 +#define BRW_AOP_SUB 8 +#define BRW_AOP_REVSUB 9 +#define BRW_AOP_IMAX 10 +#define BRW_AOP_IMIN 11 +#define BRW_AOP_UMAX 12 +#define BRW_AOP_UMIN 13 +#define BRW_AOP_CMPWR 14 +#define BRW_AOP_PREDEC 15
This looks awesome. Series is: Reviewed-by: Kenneth Graunke <[email protected]> _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
