From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_gpu_info.c | 3 +++ src/amd/common/ac_gpu_info.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 4 +--- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 ++++ 4 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 8705d878f9a..bfaff45219f 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -407,20 +407,22 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, info->num_sdma_rings = util_bitcount(dma.available_rings); info->num_compute_rings = util_bitcount(compute.available_rings); /* Get the number of good compute units. */ info->num_good_compute_units = 0; for (i = 0; i < info->max_se; i++) for (j = 0; j < info->max_sh_per_se; j++) info->num_good_compute_units += util_bitcount(amdinfo->cu_bitmap[i][j]); + info->num_good_cu_per_sh = info->num_good_compute_units / + (info->max_se * info->max_sh_per_se); memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode)); info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask; memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode, sizeof(amdinfo->gb_macro_tile_mode)); info->pte_fragment_size = alignment_info.size_local; info->gart_page_size = alignment_info.size_remote; @@ -536,20 +538,21 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" kernel_flushes_tc_l2_after_ib = %u\n", info->kernel_flushes_tc_l2_after_ib); printf(" has_indirect_compute_dispatch = %u\n", info->has_indirect_compute_dispatch); printf(" has_unaligned_shader_loads = %u\n", info->has_unaligned_shader_loads); printf(" has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings); printf(" has_2d_tiling = %u\n", info->has_2d_tiling); printf(" has_read_registers_query = %u\n", info->has_read_registers_query); printf("Shader core info:\n"); printf(" max_shader_clock = %i\n", info->max_shader_clock); printf(" num_good_compute_units = %i\n", info->num_good_compute_units); + printf(" num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh); printf(" num_tcc_blocks = %i\n", info->num_tcc_blocks); printf(" max_se = %i\n", info->max_se); printf(" max_sh_per_se = %i\n", info->max_sh_per_se); printf("Render backend info:\n"); printf(" num_render_backends = %i\n", info->num_render_backends); printf(" num_tile_pipes = %i\n", info->num_tile_pipes); printf(" pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes); printf(" enabled_rb_mask = 0x%x\n", info->enabled_rb_mask); printf(" max_alignment = %u\n", (unsigned)info->max_alignment); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index a897496da48..0583a6037f2 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -109,20 +109,21 @@ struct radeon_info { bool has_indirect_compute_dispatch; bool has_unaligned_shader_loads; bool has_sparse_vm_mappings; bool has_2d_tiling; bool has_read_registers_query; /* Shader cores. */ uint32_t r600_max_quad_pipes; /* wave size / 16 */ uint32_t max_shader_clock; uint32_t num_good_compute_units; + uint32_t num_good_cu_per_sh; uint32_t num_tcc_blocks; uint32_t max_se; /* shader engines */ uint32_t max_sh_per_se; /* shader arrays per shader engine */ /* Render backends (color + depth blocks). */ uint32_t r300_num_gb_pipes; uint32_t r300_num_z_pipes; uint32_t r600_gb_backend_map; /* R600 harvest config */ bool r600_gb_backend_map_valid; uint32_t r600_num_banks; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 780d9010abc..c9851ff3300 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4921,23 +4921,21 @@ static void si_init_config(struct si_context *sctx) * but we don't use on-chip GS. */ si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); } si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); /* Compute LATE_ALLOC_VS.LIMIT. */ - unsigned num_cu_per_sh = sscreen->info.num_good_compute_units / - (sscreen->info.max_se * - sscreen->info.max_sh_per_se); + unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh; unsigned late_alloc_limit; /* The limit is per SH. */ if (sctx->family == CHIP_KABINI) { late_alloc_limit = 0; /* Potential hang on Kabini. */ } else if (num_cu_per_sh <= 4) { /* Too few available compute units per SH. Disallowing * VS to run on one CU could hurt us more than late VS * allocation would help. * * 2 is the highest safe number that allows us to keep diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index f8702e7c601..343c80c600f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -517,20 +517,24 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) ws->info.max_se = 2; break; case CHIP_HAWAII: ws->info.max_se = 4; break; } } radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, &ws->info.max_sh_per_se); + if (ws->gen == DRV_SI) { + ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units / + (ws->info.max_se * ws->info.max_sh_per_se); + } radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL, &ws->accel_working2); if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) { fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, " "returned accel_working2 value %u is smaller than 2. " "Please install a newer kernel.\n", ws->accel_working2); return false; } -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev