Can we use this instead? https://cgit.freedesktop.org/~mareko/mesa/commit/?h=master&id=65f55ddb4b0140f0beb0868381be5edac64b5137
Marek On Tue, Nov 28, 2017 at 3:45 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > --- > src/gallium/drivers/radeon/r600_pipe_common.c | 431 ----------------------- > src/gallium/drivers/radeon/r600_pipe_common.h | 4 - > src/gallium/drivers/radeonsi/si_pipe.c | 432 > ++++++++++++++++++++++++ > src/gallium/drivers/radeonsi/si_pipe.h | 4 + > src/gallium/drivers/radeonsi/si_state_shaders.c | 14 +- > 5 files changed, 443 insertions(+), 442 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c > b/src/gallium/drivers/radeon/r600_pipe_common.c > index ce612113c51..036f380b0b3 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.c > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c > @@ -29,21 +29,20 @@ > #include "util/u_memory.h" > #include "util/u_format_s3tc.h" > #include "util/u_upload_mgr.h" > #include "util/os_time.h" > #include "vl/vl_decoder.h" > #include "vl/vl_video_buffer.h" > #include "radeon/radeon_video.h" > #include "amd/common/ac_llvm_util.h" > #include "amd/common/sid.h" > #include <inttypes.h> > -#include <sys/utsname.h> > > #include <llvm-c/TargetMachine.h> > > > /* > * shader binary helpers. > */ > void si_radeon_shader_binary_init(struct ac_shader_binary *b) > { > memset(b, 0, sizeof(*b)); > @@ -632,139 +631,20 @@ static const struct debug_named_value > common_debug_options[] = { > { "nodpbb", DBG(NO_DPBB), "Disable DPBB." }, > { "nodfsm", DBG(NO_DFSM), "Disable DFSM." }, > { "dpbb", DBG(DPBB), "Enable DPBB." }, > { "dfsm", DBG(DFSM), "Enable DFSM." }, > { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order > rasterization" }, > { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per > context." }, > > DEBUG_NAMED_VALUE_END /* must be last */ > }; > > -static const char* r600_get_vendor(struct pipe_screen* pscreen) > -{ > - return "X.Org"; > -} > - > -static const char* r600_get_device_vendor(struct pipe_screen* pscreen) > -{ > - return "AMD"; > -} > - > -static const char *r600_get_marketing_name(struct radeon_winsys *ws) > -{ > - if (!ws->get_chip_name) > - return NULL; > - return ws->get_chip_name(ws); > -} > - > -static const char *r600_get_family_name(const struct r600_common_screen > *rscreen) > -{ > - switch (rscreen->info.family) { > - case CHIP_TAHITI: return "AMD TAHITI"; > - case CHIP_PITCAIRN: return "AMD PITCAIRN"; > - case CHIP_VERDE: return "AMD CAPE VERDE"; > - case CHIP_OLAND: return "AMD OLAND"; > - case CHIP_HAINAN: return "AMD HAINAN"; > - case CHIP_BONAIRE: return "AMD BONAIRE"; > - case CHIP_KAVERI: return "AMD KAVERI"; > - case CHIP_KABINI: return "AMD KABINI"; > - case CHIP_HAWAII: return "AMD HAWAII"; > - case CHIP_MULLINS: return "AMD MULLINS"; > - case CHIP_TONGA: return "AMD TONGA"; > - case CHIP_ICELAND: return "AMD ICELAND"; > - case CHIP_CARRIZO: return "AMD CARRIZO"; > - case CHIP_FIJI: return "AMD FIJI"; > - case CHIP_POLARIS10: return "AMD POLARIS10"; > - case CHIP_POLARIS11: return "AMD POLARIS11"; > - case CHIP_POLARIS12: return "AMD POLARIS12"; > - case CHIP_STONEY: return "AMD STONEY"; > - case CHIP_VEGA10: return "AMD VEGA10"; > - case CHIP_RAVEN: return "AMD RAVEN"; > - default: return "AMD unknown"; > - } > -} > - > -static void r600_disk_cache_create(struct r600_common_screen *rscreen) > -{ > - /* Don't use the cache if shader dumping is enabled. */ > - if (rscreen->debug_flags & DBG_ALL_SHADERS) > - return; > - > - /* TODO: remove this once gallium supports a nir cache */ > - if (rscreen->debug_flags & DBG(NIR)) > - return; > - > - uint32_t mesa_timestamp; > - if (disk_cache_get_function_timestamp(r600_disk_cache_create, > - &mesa_timestamp)) { > - char *timestamp_str; > - int res = -1; > - uint32_t llvm_timestamp; > - > - if > (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, > - &llvm_timestamp)) { > - res = asprintf(×tamp_str, "%u_%u", > - mesa_timestamp, llvm_timestamp); > - } > - > - if (res != -1) { > - /* These flags affect shader compilation. */ > - uint64_t shader_debug_flags = > - rscreen->debug_flags & > - (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | > - DBG(SI_SCHED) | > - DBG(UNSAFE_MATH)); > - > - rscreen->disk_shader_cache = > - > disk_cache_create(r600_get_family_name(rscreen), > - timestamp_str, > - shader_debug_flags); > - free(timestamp_str); > - } > - } > -} > - > -static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen > *pscreen) > -{ > - struct r600_common_screen *rscreen = (struct > r600_common_screen*)pscreen; > - return rscreen->disk_shader_cache; > -} > - > -static const char* r600_get_name(struct pipe_screen* pscreen) > -{ > - struct r600_common_screen *rscreen = (struct > r600_common_screen*)pscreen; > - > - return rscreen->renderer_string; > -} > - > -static float r600_get_paramf(struct pipe_screen* pscreen, > - enum pipe_capf param) > -{ > - switch (param) { > - case PIPE_CAPF_MAX_LINE_WIDTH: > - case PIPE_CAPF_MAX_LINE_WIDTH_AA: > - case PIPE_CAPF_MAX_POINT_WIDTH: > - case PIPE_CAPF_MAX_POINT_WIDTH_AA: > - return 8192.0f; > - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: > - return 16.0f; > - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: > - return 16.0f; > - case PIPE_CAPF_GUARD_BAND_LEFT: > - case PIPE_CAPF_GUARD_BAND_TOP: > - case PIPE_CAPF_GUARD_BAND_RIGHT: > - case PIPE_CAPF_GUARD_BAND_BOTTOM: > - return 0.0f; > - } > - return 0.0f; > -} > - > static int r600_get_video_param(struct pipe_screen *screen, > enum pipe_video_profile profile, > enum pipe_video_entrypoint entrypoint, > enum pipe_video_cap param) > { > switch (param) { > case PIPE_VIDEO_CAP_SUPPORTED: > return vl_profile_supported(screen, profile, entrypoint); > case PIPE_VIDEO_CAP_NPOT_TEXTURES: > return 1; > @@ -779,390 +659,79 @@ static int r600_get_video_param(struct pipe_screen > *screen, > return false; > case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: > return true; > case PIPE_VIDEO_CAP_MAX_LEVEL: > return vl_level_supported(screen, profile); > default: > return 0; > } > } > > -static unsigned get_max_threads_per_block(struct r600_common_screen *screen, > - enum pipe_shader_ir ir_type) > -{ > - if (ir_type != PIPE_SHADER_IR_TGSI) > - return 256; > - > - /* Only 16 waves per thread-group on gfx9. */ > - if (screen->chip_class >= GFX9) > - return 1024; > - > - /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice > - * round number. > - */ > - return 2048; > -} > - > -static int r600_get_compute_param(struct pipe_screen *screen, > - enum pipe_shader_ir ir_type, > - enum pipe_compute_cap param, > - void *ret) > -{ > - struct r600_common_screen *rscreen = (struct r600_common_screen > *)screen; > - > - //TODO: select these params by asic > - switch (param) { > - case PIPE_COMPUTE_CAP_IR_TARGET: { > - const char *gpu; > - const char *triple; > - > - if (HAVE_LLVM < 0x0400) > - triple = "amdgcn--"; > - else > - triple = "amdgcn-mesa-mesa3d"; > - > - gpu = ac_get_llvm_processor_name(rscreen->family); > - if (ret) { > - sprintf(ret, "%s-%s", gpu, triple); > - } > - /* +2 for dash and terminating NIL byte */ > - return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); > - } > - case PIPE_COMPUTE_CAP_GRID_DIMENSION: > - if (ret) { > - uint64_t *grid_dimension = ret; > - grid_dimension[0] = 3; > - } > - return 1 * sizeof(uint64_t); > - > - case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: > - if (ret) { > - uint64_t *grid_size = ret; > - grid_size[0] = 65535; > - grid_size[1] = 65535; > - grid_size[2] = 65535; > - } > - return 3 * sizeof(uint64_t) ; > - > - case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: > - if (ret) { > - uint64_t *block_size = ret; > - unsigned threads_per_block = > get_max_threads_per_block(rscreen, ir_type); > - block_size[0] = threads_per_block; > - block_size[1] = threads_per_block; > - block_size[2] = threads_per_block; > - } > - return 3 * sizeof(uint64_t); > - > - case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: > - if (ret) { > - uint64_t *max_threads_per_block = ret; > - *max_threads_per_block = > get_max_threads_per_block(rscreen, ir_type); > - } > - return sizeof(uint64_t); > - case PIPE_COMPUTE_CAP_ADDRESS_BITS: > - if (ret) { > - uint32_t *address_bits = ret; > - address_bits[0] = 64; > - } > - return 1 * sizeof(uint32_t); > - > - case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: > - if (ret) { > - uint64_t *max_global_size = ret; > - uint64_t max_mem_alloc_size; > - > - r600_get_compute_param(screen, ir_type, > - PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, > - &max_mem_alloc_size); > - > - /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least > - * 1/4 of the MAX_GLOBAL_SIZE. Since the > - * MAX_MEM_ALLOC_SIZE is fixed for older kernels, > - * make sure we never report more than > - * 4 * MAX_MEM_ALLOC_SIZE. > - */ > - *max_global_size = MIN2(4 * max_mem_alloc_size, > - MAX2(rscreen->info.gart_size, > - > rscreen->info.vram_size)); > - } > - return sizeof(uint64_t); > - > - case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: > - if (ret) { > - uint64_t *max_local_size = ret; > - /* Value reported by the closed source driver. */ > - *max_local_size = 32768; > - } > - return sizeof(uint64_t); > - > - case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: > - if (ret) { > - uint64_t *max_input_size = ret; > - /* Value reported by the closed source driver. */ > - *max_input_size = 1024; > - } > - return sizeof(uint64_t); > - > - case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: > - if (ret) { > - uint64_t *max_mem_alloc_size = ret; > - > - *max_mem_alloc_size = rscreen->info.max_alloc_size; > - } > - return sizeof(uint64_t); > - > - case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: > - if (ret) { > - uint32_t *max_clock_frequency = ret; > - *max_clock_frequency = rscreen->info.max_shader_clock; > - } > - return sizeof(uint32_t); > - > - case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: > - if (ret) { > - uint32_t *max_compute_units = ret; > - *max_compute_units = > rscreen->info.num_good_compute_units; > - } > - return sizeof(uint32_t); > - > - case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: > - if (ret) { > - uint32_t *images_supported = ret; > - *images_supported = 0; > - } > - return sizeof(uint32_t); > - case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: > - break; /* unused */ > - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: > - if (ret) { > - uint32_t *subgroup_size = ret; > - *subgroup_size = 64; > - } > - return sizeof(uint32_t); > - case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: > - if (ret) { > - uint64_t *max_variable_threads_per_block = ret; > - if (ir_type == PIPE_SHADER_IR_TGSI) > - *max_variable_threads_per_block = > SI_MAX_VARIABLE_THREADS_PER_BLOCK; > - else > - *max_variable_threads_per_block = 0; > - } > - return sizeof(uint64_t); > - } > - > - fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); > - return 0; > -} > - > -static uint64_t r600_get_timestamp(struct pipe_screen *screen) > -{ > - struct r600_common_screen *rscreen = (struct > r600_common_screen*)screen; > - > - return 1000000 * rscreen->ws->query_value(rscreen->ws, > RADEON_TIMESTAMP) / > - rscreen->info.clock_crystal_freq; > -} > - > -static void r600_query_memory_info(struct pipe_screen *screen, > - struct pipe_memory_info *info) > -{ > - struct r600_common_screen *rscreen = (struct > r600_common_screen*)screen; > - struct radeon_winsys *ws = rscreen->ws; > - unsigned vram_usage, gtt_usage; > - > - info->total_device_memory = rscreen->info.vram_size / 1024; > - info->total_staging_memory = rscreen->info.gart_size / 1024; > - > - /* The real TTM memory usage is somewhat random, because: > - * > - * 1) TTM delays freeing memory, because it can only free it after > - * fences expire. > - * > - * 2) The memory usage can be really low if big VRAM evictions are > - * taking place, but the real usage is well above the size of VRAM. > - * > - * Instead, return statistics of this process. > - */ > - vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; > - gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; > - > - info->avail_device_memory = > - vram_usage <= info->total_device_memory ? > - info->total_device_memory - vram_usage : 0; > - info->avail_staging_memory = > - gtt_usage <= info->total_staging_memory ? > - info->total_staging_memory - gtt_usage : 0; > - > - info->device_memory_evicted = > - ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; > - > - if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) > - info->nr_device_memory_evictions = > - ws->query_value(ws, RADEON_NUM_EVICTIONS); > - else > - /* Just return the number of evicted 64KB pages. */ > - info->nr_device_memory_evictions = > info->device_memory_evicted / 64; > -} > - > struct pipe_resource *si_resource_create_common(struct pipe_screen *screen, > const struct pipe_resource > *templ) > { > if (templ->target == PIPE_BUFFER) { > return si_buffer_create(screen, templ, 256); > } else { > return si_texture_create(screen, templ); > } > } > > bool si_common_screen_init(struct r600_common_screen *rscreen, > struct radeon_winsys *ws) > { > - char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] > = {}; > - struct utsname uname_data; > - const char *chip_name; > - > - ws->query_info(ws, &rscreen->info); > - rscreen->ws = ws; > - > - if ((chip_name = r600_get_marketing_name(ws))) > - snprintf(family_name, sizeof(family_name), "%s / ", > - r600_get_family_name(rscreen) + 4); > - else > - chip_name = r600_get_family_name(rscreen); > - > - if (uname(&uname_data) == 0) > - snprintf(kernel_version, sizeof(kernel_version), > - " / %s", uname_data.release); > - > - if (HAVE_LLVM > 0) { > - snprintf(llvm_string, sizeof(llvm_string), > - ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, > - HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); > - } > - > - snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), > - "%s (%sDRM %i.%i.%i%s%s)", > - chip_name, family_name, rscreen->info.drm_major, > - rscreen->info.drm_minor, rscreen->info.drm_patchlevel, > - kernel_version, llvm_string); > - > - rscreen->b.get_name = r600_get_name; > - rscreen->b.get_vendor = r600_get_vendor; > - rscreen->b.get_device_vendor = r600_get_device_vendor; > - rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache; > - rscreen->b.get_compute_param = r600_get_compute_param; > - rscreen->b.get_paramf = r600_get_paramf; > - rscreen->b.get_timestamp = r600_get_timestamp; > rscreen->b.resource_destroy = u_resource_destroy_vtbl; > rscreen->b.resource_from_user_memory = si_buffer_from_user_memory; > - rscreen->b.query_memory_info = r600_query_memory_info; > > if (rscreen->info.has_hw_decode) { > rscreen->b.get_video_param = si_vid_get_video_param; > rscreen->b.is_video_format_supported = > si_vid_is_format_supported; > } else { > rscreen->b.get_video_param = r600_get_video_param; > rscreen->b.is_video_format_supported = > vl_video_buffer_is_format_supported; > } > > si_init_screen_texture_functions(rscreen); > si_init_screen_query_functions(rscreen); > > rscreen->family = rscreen->info.family; > rscreen->chip_class = rscreen->info.chip_class; > rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", > common_debug_options, 0); > rscreen->has_rbplus = false; > rscreen->rbplus_allowed = false; > > - r600_disk_cache_create(rscreen); > - > slab_create_parent(&rscreen->pool_transfers, sizeof(struct > r600_transfer), 64); > > rscreen->force_aniso = MIN2(16, > debug_get_num_option("R600_TEX_ANISO", -1)); > if (rscreen->force_aniso >= 0) { > printf("radeon: Forcing anisotropy filter to %ix\n", > /* round down to a power of two */ > 1 << util_logbase2(rscreen->force_aniso)); > } > > (void) mtx_init(&rscreen->aux_context_lock, mtx_plain); > (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain); > > - if (rscreen->debug_flags & DBG(INFO)) { > - printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", > - rscreen->info.pci_domain, rscreen->info.pci_bus, > - rscreen->info.pci_dev, rscreen->info.pci_func); > - printf("pci_id = 0x%x\n", rscreen->info.pci_id); > - printf("family = %i (%s)\n", rscreen->info.family, > - r600_get_family_name(rscreen)); > - printf("chip_class = %i\n", rscreen->info.chip_class); > - printf("pte_fragment_size = %u\n", > rscreen->info.pte_fragment_size); > - printf("gart_page_size = %u\n", rscreen->info.gart_page_size); > - printf("gart_size = %i MB\n", > (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); > - printf("vram_size = %i MB\n", > (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); > - printf("vram_vis_size = %i MB\n", > (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024)); > - printf("max_alloc_size = %i MB\n", > - (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, > 1024*1024)); > - printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size); > - printf("has_dedicated_vram = %u\n", > rscreen->info.has_dedicated_vram); > - printf("has_virtual_memory = %i\n", > rscreen->info.has_virtual_memory); > - printf("gfx_ib_pad_with_type2 = %i\n", > rscreen->info.gfx_ib_pad_with_type2); > - printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode); > - printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings); > - printf("num_compute_rings = %u\n", > rscreen->info.num_compute_rings); > - printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version); > - printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version); > - printf("me_fw_version = %i\n", rscreen->info.me_fw_version); > - printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature); > - printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version); > - printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature); > - printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version); > - printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature); > - printf("vce_harvest_config = %i\n", > rscreen->info.vce_harvest_config); > - printf("clock_crystal_freq = %i\n", > rscreen->info.clock_crystal_freq); > - printf("tcc_cache_line_size = %u\n", > rscreen->info.tcc_cache_line_size); > - printf("drm = %i.%i.%i\n", rscreen->info.drm_major, > - rscreen->info.drm_minor, rscreen->info.drm_patchlevel); > - printf("has_userptr = %i\n", rscreen->info.has_userptr); > - printf("has_syncobj = %u\n", rscreen->info.has_syncobj); > - printf("has_sync_file = %u\n", rscreen->info.has_sync_file); > - > - printf("r600_max_quad_pipes = %i\n", > rscreen->info.r600_max_quad_pipes); > - printf("max_shader_clock = %i\n", > rscreen->info.max_shader_clock); > - printf("num_good_compute_units = %i\n", > rscreen->info.num_good_compute_units); > - printf("max_se = %i\n", rscreen->info.max_se); > - printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); > - > - printf("r600_gb_backend_map = %i\n", > rscreen->info.r600_gb_backend_map); > - printf("r600_gb_backend_map_valid = %i\n", > rscreen->info.r600_gb_backend_map_valid); > - printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); > - printf("num_render_backends = %i\n", > rscreen->info.num_render_backends); > - printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); > - printf("pipe_interleave_bytes = %i\n", > rscreen->info.pipe_interleave_bytes); > - printf("enabled_rb_mask = 0x%x\n", > rscreen->info.enabled_rb_mask); > - printf("max_alignment = %u\n", > (unsigned)rscreen->info.max_alignment); > - } > return true; > } > > void si_destroy_common_screen(struct r600_common_screen *rscreen) > { > si_perfcounters_destroy(rscreen); > si_gpu_load_kill_thread(rscreen); > > mtx_destroy(&rscreen->gpu_load_mutex); > mtx_destroy(&rscreen->aux_context_lock); > rscreen->aux_context->destroy(rscreen->aux_context); > > slab_destroy_parent(&rscreen->pool_transfers); > > - disk_cache_destroy(rscreen->disk_shader_cache); > rscreen->ws->destroy(rscreen->ws); > FREE(rscreen); > } > > bool si_can_dump_shader(struct r600_common_screen *rscreen, > unsigned processor) > { > return rscreen->debug_flags & (1 << processor); > } > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index adfcc7c8a70..4b80d188fba 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -387,22 +387,20 @@ struct r600_memory_object { > struct r600_common_screen { > struct pipe_screen b; > struct radeon_winsys *ws; > enum radeon_family family; > enum chip_class chip_class; > struct radeon_info info; > uint64_t debug_flags; > bool has_rbplus; /* if RB+ registers > exist */ > bool rbplus_allowed; /* if RB+ is allowed > */ > > - struct disk_cache *disk_shader_cache; > - > struct slab_parent_pool pool_transfers; > > /* Texture filter settings. */ > int force_aniso; /* -1 = disabled */ > > /* Auxiliary context. Mainly used to initialize resources. > * It must be locked prior to using and flushed before unlocking. */ > struct pipe_context *aux_context; > mtx_t aux_context_lock; > > @@ -415,22 +413,20 @@ struct r600_common_screen { > */ > unsigned num_shaders_created; > unsigned num_shader_cache_hits; > > /* GPU load thread. */ > mtx_t gpu_load_mutex; > thrd_t gpu_load_thread; > union r600_mmio_counters mmio_counters; > volatile unsigned gpu_load_stop_thread; /* bool */ > > - char renderer_string[100]; > - > /* Performance counters. */ > struct r600_perfcounters *perfcounters; > > /* If pipe_screen wants to recompute and re-emit the framebuffer, > * sampler, and image states of all contexts, it should atomically > * increment this. > * > * Each context will compare this with its own last known value of > * the counter before drawing and re-emit the states accordingly. > */ > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index b3d8ae508bd..b38c55619f7 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -31,20 +31,22 @@ > #include "util/u_log.h" > #include "util/u_memory.h" > #include "util/u_suballoc.h" > #include "util/u_tests.h" > #include "util/xmlconfig.h" > #include "vl/vl_decoder.h" > #include "../ddebug/dd_util.h" > > #include "compiler/nir/nir.h" > > +#include <sys/utsname.h> > + > /* > * pipe_context > */ > static void si_destroy_context(struct pipe_context *context) > { > struct si_context *sctx = (struct si_context *)context; > int i; > > /* Unreference the framebuffer normally to disable related logic > * properly. > @@ -394,20 +396,306 @@ static struct pipe_context > *si_pipe_create_context(struct pipe_screen *screen, > * implementation for fence_server_sync is incomplete. */ > return threaded_context_create(ctx, &sscreen->b.pool_transfers, > si_replace_buffer_storage, > sscreen->b.info.drm_major >= 3 ? > si_create_fence : NULL, > &((struct si_context*)ctx)->b.tc); > } > > /* > * pipe_screen > */ > +static const char* si_get_vendor(struct pipe_screen* pscreen) > +{ > + return "X.Org"; > +} > + > +static const char* si_get_device_vendor(struct pipe_screen* pscreen) > +{ > + return "AMD"; > +} > + > +static const char *si_get_marketing_name(struct radeon_winsys *ws) > +{ > + if (!ws->get_chip_name) > + return NULL; > + return ws->get_chip_name(ws); > +} > + > +static const char *si_get_family_name(const struct si_screen *screen) > +{ > + switch (screen->b.info.family) { > + case CHIP_TAHITI: return "AMD TAHITI"; > + case CHIP_PITCAIRN: return "AMD PITCAIRN"; > + case CHIP_VERDE: return "AMD CAPE VERDE"; > + case CHIP_OLAND: return "AMD OLAND"; > + case CHIP_HAINAN: return "AMD HAINAN"; > + case CHIP_BONAIRE: return "AMD BONAIRE"; > + case CHIP_KAVERI: return "AMD KAVERI"; > + case CHIP_KABINI: return "AMD KABINI"; > + case CHIP_HAWAII: return "AMD HAWAII"; > + case CHIP_MULLINS: return "AMD MULLINS"; > + case CHIP_TONGA: return "AMD TONGA"; > + case CHIP_ICELAND: return "AMD ICELAND"; > + case CHIP_CARRIZO: return "AMD CARRIZO"; > + case CHIP_FIJI: return "AMD FIJI"; > + case CHIP_POLARIS10: return "AMD POLARIS10"; > + case CHIP_POLARIS11: return "AMD POLARIS11"; > + case CHIP_POLARIS12: return "AMD POLARIS12"; > + case CHIP_STONEY: return "AMD STONEY"; > + case CHIP_VEGA10: return "AMD VEGA10"; > + case CHIP_RAVEN: return "AMD RAVEN"; > + default: return "AMD unknown"; > + } > +} > + > +static void si_disk_cache_create(struct si_screen *screen) > +{ > + /* Don't use the cache if shader dumping is enabled. */ > + if (screen->b.debug_flags & DBG_ALL_SHADERS) > + return; > + > + /* TODO: remove this once gallium supports a nir cache */ > + if (screen->b.debug_flags & DBG(NIR)) > + return; > + > + uint32_t mesa_timestamp; > + if (disk_cache_get_function_timestamp(si_disk_cache_create, > + &mesa_timestamp)) { > + char *timestamp_str; > + int res = -1; > + uint32_t llvm_timestamp; > + > + if > (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, > + &llvm_timestamp)) { > + res = asprintf(×tamp_str, "%u_%u", > + mesa_timestamp, llvm_timestamp); > + } > + > + if (res != -1) { > + /* These flags affect shader compilation. */ > + uint64_t shader_debug_flags = > + screen->b.debug_flags & > + (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | > + DBG(SI_SCHED) | > + DBG(UNSAFE_MATH)); > + > + screen->disk_shader_cache = > + disk_cache_create(si_get_family_name(screen), > + timestamp_str, > + shader_debug_flags); > + free(timestamp_str); > + } > + } > +} > + > +static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen > *pscreen) > +{ > + struct si_screen *sscreen = (struct si_screen*)pscreen; > + return sscreen->disk_shader_cache; > +} > + > +static const char* si_get_name(struct pipe_screen* pscreen) > +{ > + struct si_screen *sscreen = (struct si_screen*)pscreen; > + > + return sscreen->renderer_string; > +} > + > +static float si_get_paramf(struct pipe_screen* pscreen, > + enum pipe_capf param) > +{ > + switch (param) { > + case PIPE_CAPF_MAX_LINE_WIDTH: > + case PIPE_CAPF_MAX_LINE_WIDTH_AA: > + case PIPE_CAPF_MAX_POINT_WIDTH: > + case PIPE_CAPF_MAX_POINT_WIDTH_AA: > + return 8192.0f; > + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: > + return 16.0f; > + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: > + return 16.0f; > + case PIPE_CAPF_GUARD_BAND_LEFT: > + case PIPE_CAPF_GUARD_BAND_TOP: > + case PIPE_CAPF_GUARD_BAND_RIGHT: > + case PIPE_CAPF_GUARD_BAND_BOTTOM: > + return 0.0f; > + } > + return 0.0f; > +} > + > +static unsigned get_max_threads_per_block(struct si_screen *screen, > + enum pipe_shader_ir ir_type) > +{ > + if (ir_type != PIPE_SHADER_IR_TGSI) > + return 256; > + > + /* Only 16 waves per thread-group on gfx9. */ > + if (screen->b.chip_class >= GFX9) > + return 1024; > + > + /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice > + * round number. > + */ > + return 2048; > +} > + > +static int si_get_compute_param(struct pipe_screen *screen, > + enum pipe_shader_ir ir_type, > + enum pipe_compute_cap param, > + void *ret) > +{ > + struct si_screen *sscreen = (struct si_screen *)screen; > + > + switch (param) { > + case PIPE_COMPUTE_CAP_IR_TARGET: { > + const char *gpu; > + const char *triple; > + > + if (HAVE_LLVM < 0x0400) > + triple = "amdgcn--"; > + else > + triple = "amdgcn-mesa-mesa3d"; > + > + gpu = ac_get_llvm_processor_name(sscreen->b.family); > + if (ret) { > + sprintf(ret, "%s-%s", gpu, triple); > + } > + /* +2 for dash and terminating NIL byte */ > + return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); > + } > + case PIPE_COMPUTE_CAP_GRID_DIMENSION: > + if (ret) { > + uint64_t *grid_dimension = ret; > + grid_dimension[0] = 3; > + } > + return 1 * sizeof(uint64_t); > + > + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: > + if (ret) { > + uint64_t *grid_size = ret; > + grid_size[0] = 65535; > + grid_size[1] = 65535; > + grid_size[2] = 65535; > + } > + return 3 * sizeof(uint64_t) ; > + > + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: > + if (ret) { > + uint64_t *block_size = ret; > + unsigned threads_per_block = > get_max_threads_per_block(sscreen, ir_type); > + block_size[0] = threads_per_block; > + block_size[1] = threads_per_block; > + block_size[2] = threads_per_block; > + } > + return 3 * sizeof(uint64_t); > + > + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: > + if (ret) { > + uint64_t *max_threads_per_block = ret; > + *max_threads_per_block = > get_max_threads_per_block(sscreen, ir_type); > + } > + return sizeof(uint64_t); > + case PIPE_COMPUTE_CAP_ADDRESS_BITS: > + if (ret) { > + uint32_t *address_bits = ret; > + address_bits[0] = 64; > + } > + return 1 * sizeof(uint32_t); > + > + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: > + if (ret) { > + uint64_t *max_global_size = ret; > + uint64_t max_mem_alloc_size; > + > + si_get_compute_param(screen, ir_type, > + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, > + &max_mem_alloc_size); > + > + /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least > + * 1/4 of the MAX_GLOBAL_SIZE. Since the > + * MAX_MEM_ALLOC_SIZE is fixed for older kernels, > + * make sure we never report more than > + * 4 * MAX_MEM_ALLOC_SIZE. > + */ > + *max_global_size = MIN2(4 * max_mem_alloc_size, > + > MAX2(sscreen->b.info.gart_size, > + > sscreen->b.info.vram_size)); > + } > + return sizeof(uint64_t); > + > + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: > + if (ret) { > + uint64_t *max_local_size = ret; > + /* Value reported by the closed source driver. */ > + *max_local_size = 32768; > + } > + return sizeof(uint64_t); > + > + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: > + if (ret) { > + uint64_t *max_input_size = ret; > + /* Value reported by the closed source driver. */ > + *max_input_size = 1024; > + } > + return sizeof(uint64_t); > + > + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: > + if (ret) { > + uint64_t *max_mem_alloc_size = ret; > + > + *max_mem_alloc_size = sscreen->b.info.max_alloc_size; > + } > + return sizeof(uint64_t); > + > + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: > + if (ret) { > + uint32_t *max_clock_frequency = ret; > + *max_clock_frequency = > sscreen->b.info.max_shader_clock; > + } > + return sizeof(uint32_t); > + > + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: > + if (ret) { > + uint32_t *max_compute_units = ret; > + *max_compute_units = > sscreen->b.info.num_good_compute_units; > + } > + return sizeof(uint32_t); > + > + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: > + if (ret) { > + uint32_t *images_supported = ret; > + *images_supported = 0; > + } > + return sizeof(uint32_t); > + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: > + break; /* unused */ > + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: > + if (ret) { > + uint32_t *subgroup_size = ret; > + *subgroup_size = 64; > + } > + return sizeof(uint32_t); > + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: > + if (ret) { > + uint64_t *max_variable_threads_per_block = ret; > + if (ir_type == PIPE_SHADER_IR_TGSI) > + *max_variable_threads_per_block = > SI_MAX_VARIABLE_THREADS_PER_BLOCK; > + else > + *max_variable_threads_per_block = 0; > + } > + return sizeof(uint64_t); > + } > + > + fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); > + return 0; > +} > + > static bool si_have_tgsi_compute(struct si_screen *sscreen) > { > /* Old kernels disallowed some register writes for SI > * that are used for indirect dispatches. */ > return (sscreen->b.chip_class >= CIK || > sscreen->b.info.drm_major == 3 || > (sscreen->b.info.drm_major == 2 && > sscreen->b.info.drm_minor >= 45)); > } > > @@ -823,20 +1111,69 @@ static const struct nir_shader_compiler_options > nir_options = { > > static const void * > si_get_compiler_options(struct pipe_screen *screen, > enum pipe_shader_ir ir, > enum pipe_shader_type shader) > { > assert(ir == PIPE_SHADER_IR_NIR); > return &nir_options; > } > > +static uint64_t si_get_timestamp(struct pipe_screen *screen) > +{ > + struct r600_common_screen *rscreen = (struct > r600_common_screen*)screen; > + > + return 1000000 * rscreen->ws->query_value(rscreen->ws, > RADEON_TIMESTAMP) / > + rscreen->info.clock_crystal_freq; > +} > + > +static void si_query_memory_info(struct pipe_screen *screen, > + struct pipe_memory_info *info) > +{ > + struct r600_common_screen *rscreen = (struct > r600_common_screen*)screen; > + struct radeon_winsys *ws = rscreen->ws; > + unsigned vram_usage, gtt_usage; > + > + info->total_device_memory = rscreen->info.vram_size / 1024; > + info->total_staging_memory = rscreen->info.gart_size / 1024; > + > + /* The real TTM memory usage is somewhat random, because: > + * > + * 1) TTM delays freeing memory, because it can only free it after > + * fences expire. > + * > + * 2) The memory usage can be really low if big VRAM evictions are > + * taking place, but the real usage is well above the size of VRAM. > + * > + * Instead, return statistics of this process. > + */ > + vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; > + gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; > + > + info->avail_device_memory = > + vram_usage <= info->total_device_memory ? > + info->total_device_memory - vram_usage : 0; > + info->avail_staging_memory = > + gtt_usage <= info->total_staging_memory ? > + info->total_staging_memory - gtt_usage : 0; > + > + info->device_memory_evicted = > + ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; > + > + if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) > + info->nr_device_memory_evictions = > + ws->query_value(ws, RADEON_NUM_EVICTIONS); > + else > + /* Just return the number of evicted 64KB pages. */ > + info->nr_device_memory_evictions = > info->device_memory_evicted / 64; > +} > + > static void si_destroy_screen(struct pipe_screen* pscreen) > { > struct si_screen *sscreen = (struct si_screen *)pscreen; > struct si_shader_part *parts[] = { > sscreen->vs_prologs, > sscreen->tcs_epilogs, > sscreen->gs_prologs, > sscreen->ps_prologs, > sscreen->ps_epilogs > }; > @@ -861,20 +1198,21 @@ static void si_destroy_screen(struct pipe_screen* > pscreen) > while (parts[i]) { > struct si_shader_part *part = parts[i]; > > parts[i] = part->next; > si_radeon_shader_binary_clean(&part->binary); > FREE(part); > } > } > mtx_destroy(&sscreen->shader_parts_mutex); > si_destroy_shader_cache(sscreen); > + disk_cache_destroy(sscreen->disk_shader_cache); > si_destroy_common_screen(&sscreen->b); > } > > static bool si_init_gs_info(struct si_screen *sscreen) > { > /* gs_table_depth is not used by GFX9 */ > if (sscreen->b.chip_class >= GFX9) > return true; > > switch (sscreen->b.family) { > @@ -977,34 +1315,71 @@ static void radeonsi_get_device_uuid(struct > pipe_screen *pscreen, char *uuid) > { > struct r600_common_screen *rscreen = (struct r600_common_screen > *)pscreen; > > ac_compute_device_uuid(&rscreen->info, uuid, PIPE_UUID_SIZE); > } > > struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, > const struct pipe_screen_config > *config) > { > struct si_screen *sscreen = CALLOC_STRUCT(si_screen); > + char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] > = {}; > unsigned num_threads, num_compiler_threads, > num_compiler_threads_lowprio, i; > + struct utsname uname_data; > + const char *chip_name; > > if (!sscreen) { > return NULL; > } > > + > + ws->query_info(ws, &sscreen->b.info); > + sscreen->b.ws = ws; > + > + if ((chip_name = si_get_marketing_name(ws))) > + snprintf(family_name, sizeof(family_name), "%s / ", > + si_get_family_name(sscreen) + 4); > + else > + chip_name = si_get_family_name(sscreen); > + > + if (uname(&uname_data) == 0) > + snprintf(kernel_version, sizeof(kernel_version), > + " / %s", uname_data.release); > + > + if (HAVE_LLVM > 0) { > + snprintf(llvm_string, sizeof(llvm_string), > + ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, > + HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); > + } > + > + snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string), > + "%s (%sDRM %i.%i.%i%s%s)", > + chip_name, family_name, sscreen->b.info.drm_major, > + sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel, > + kernel_version, llvm_string); > + > /* Set functions first. */ > sscreen->b.b.context_create = si_pipe_create_context; > sscreen->b.b.destroy = si_destroy_screen; > + sscreen->b.b.get_name = si_get_name; > + sscreen->b.b.get_vendor = si_get_vendor; > + sscreen->b.b.get_device_vendor = si_get_device_vendor; > + sscreen->b.b.get_disk_shader_cache = si_get_disk_shader_cache; > + sscreen->b.b.get_compute_param = si_get_compute_param; > + sscreen->b.b.get_paramf = si_get_paramf; > sscreen->b.b.get_param = si_get_param; > sscreen->b.b.get_shader_param = si_get_shader_param; > sscreen->b.b.get_compiler_options = si_get_compiler_options; > sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid; > sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid; > + sscreen->b.b.get_timestamp = si_get_timestamp; > + sscreen->b.b.query_memory_info = si_query_memory_info; > sscreen->b.b.resource_create = si_resource_create_common; > > si_init_screen_fence_functions(sscreen); > si_init_screen_state_functions(sscreen); > > /* Set these flags in debug_flags early, so that the shader cache > takes > * them into account. > */ > if (driQueryOptionb(config->options, > "glsl_correct_derivatives_after_discard")) > @@ -1012,20 +1387,22 @@ struct pipe_screen *radeonsi_screen_create(struct > radeon_winsys *ws, > if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) > sscreen->b.debug_flags |= DBG(SI_SCHED); > > if (!si_common_screen_init(&sscreen->b, ws) || > !si_init_gs_info(sscreen) || > !si_init_shader_cache(sscreen)) { > FREE(sscreen); > return NULL; > } > > + si_disk_cache_create(sscreen); > + > /* Only enable as many threads as we have target machines, but at most > * the number of CPUs - 1 if there is more than one. > */ > num_threads = sysconf(_SC_NPROCESSORS_ONLN); > num_threads = MAX2(1, num_threads - 1); > num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm)); > num_compiler_threads_lowprio = > MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority)); > > if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader", > @@ -1144,20 +1521,75 @@ struct pipe_screen *radeonsi_screen_create(struct > radeon_winsys *ws, > sscreen->b.debug_flags |= DBG_ALL_SHADERS; > > for (i = 0; i < num_compiler_threads; i++) > sscreen->tm[i] = si_create_llvm_target_machine(sscreen); > for (i = 0; i < num_compiler_threads_lowprio; i++) > sscreen->tm_low_priority[i] = > si_create_llvm_target_machine(sscreen); > > /* Create the auxiliary context. This must be done last. */ > sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0); > > + if (sscreen->b.debug_flags & DBG(INFO)) { > + printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", > + sscreen->b.info.pci_domain, sscreen->b.info.pci_bus, > + sscreen->b.info.pci_dev, sscreen->b.info.pci_func); > + printf("pci_id = 0x%x\n", sscreen->b.info.pci_id); > + printf("family = %i (%s)\n", sscreen->b.info.family, > + si_get_family_name(sscreen)); > + printf("chip_class = %i\n", sscreen->b.info.chip_class); > + printf("pte_fragment_size = %u\n", > sscreen->b.info.pte_fragment_size); > + printf("gart_page_size = %u\n", > sscreen->b.info.gart_page_size); > + printf("gart_size = %i MB\n", > (int)DIV_ROUND_UP(sscreen->b.info.gart_size, 1024*1024)); > + printf("vram_size = %i MB\n", > (int)DIV_ROUND_UP(sscreen->b.info.vram_size, 1024*1024)); > + printf("vram_vis_size = %i MB\n", > (int)DIV_ROUND_UP(sscreen->b.info.vram_vis_size, 1024*1024)); > + printf("max_alloc_size = %i MB\n", > + (int)DIV_ROUND_UP(sscreen->b.info.max_alloc_size, > 1024*1024)); > + printf("min_alloc_size = %u\n", > sscreen->b.info.min_alloc_size); > + printf("has_dedicated_vram = %u\n", > sscreen->b.info.has_dedicated_vram); > + printf("has_virtual_memory = %i\n", > sscreen->b.info.has_virtual_memory); > + printf("gfx_ib_pad_with_type2 = %i\n", > sscreen->b.info.gfx_ib_pad_with_type2); > + printf("has_hw_decode = %u\n", sscreen->b.info.has_hw_decode); > + printf("num_sdma_rings = %i\n", > sscreen->b.info.num_sdma_rings); > + printf("num_compute_rings = %u\n", > sscreen->b.info.num_compute_rings); > + printf("uvd_fw_version = %u\n", > sscreen->b.info.uvd_fw_version); > + printf("vce_fw_version = %u\n", > sscreen->b.info.vce_fw_version); > + printf("me_fw_version = %i\n", sscreen->b.info.me_fw_version); > + printf("me_fw_feature = %i\n", sscreen->b.info.me_fw_feature); > + printf("pfp_fw_version = %i\n", > sscreen->b.info.pfp_fw_version); > + printf("pfp_fw_feature = %i\n", > sscreen->b.info.pfp_fw_feature); > + printf("ce_fw_version = %i\n", sscreen->b.info.ce_fw_version); > + printf("ce_fw_feature = %i\n", sscreen->b.info.ce_fw_feature); > + printf("vce_harvest_config = %i\n", > sscreen->b.info.vce_harvest_config); > + printf("clock_crystal_freq = %i\n", > sscreen->b.info.clock_crystal_freq); > + printf("tcc_cache_line_size = %u\n", > sscreen->b.info.tcc_cache_line_size); > + printf("drm = %i.%i.%i\n", sscreen->b.info.drm_major, > + sscreen->b.info.drm_minor, > sscreen->b.info.drm_patchlevel); > + printf("has_userptr = %i\n", sscreen->b.info.has_userptr); > + printf("has_syncobj = %u\n", sscreen->b.info.has_syncobj); > + printf("has_sync_file = %u\n", sscreen->b.info.has_sync_file); > + > + printf("r600_max_quad_pipes = %i\n", > sscreen->b.info.r600_max_quad_pipes); > + printf("max_shader_clock = %i\n", > sscreen->b.info.max_shader_clock); > + printf("num_good_compute_units = %i\n", > sscreen->b.info.num_good_compute_units); > + printf("max_se = %i\n", sscreen->b.info.max_se); > + printf("max_sh_per_se = %i\n", sscreen->b.info.max_sh_per_se); > + > + printf("r600_gb_backend_map = %i\n", > sscreen->b.info.r600_gb_backend_map); > + printf("r600_gb_backend_map_valid = %i\n", > sscreen->b.info.r600_gb_backend_map_valid); > + printf("r600_num_banks = %i\n", > sscreen->b.info.r600_num_banks); > + printf("num_render_backends = %i\n", > sscreen->b.info.num_render_backends); > + printf("num_tile_pipes = %i\n", > sscreen->b.info.num_tile_pipes); > + printf("pipe_interleave_bytes = %i\n", > sscreen->b.info.pipe_interleave_bytes); > + printf("enabled_rb_mask = 0x%x\n", > sscreen->b.info.enabled_rb_mask); > + printf("max_alignment = %u\n", > (unsigned)sscreen->b.info.max_alignment); > + } > + > if (sscreen->b.debug_flags & DBG(TEST_DMA)) > si_test_dma(&sscreen->b); > > if (sscreen->b.debug_flags & (DBG(TEST_VMFAULT_CP) | > DBG(TEST_VMFAULT_SDMA) | > DBG(TEST_VMFAULT_SHADER))) > si_test_vmfault(sscreen); > > return &sscreen->b.b; > } > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index 751441df1bc..a66f9da8658 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -99,20 +99,22 @@ struct si_screen { > bool has_msaa_sample_loc_bug; > bool has_ls_vgpr_init_bug; > bool dpbb_allowed; > bool dfsm_allowed; > bool llvm_has_working_vgpr_indexing; > > /* Whether shaders are monolithic (1-part) or separate (3-part). */ > bool use_monolithic_shaders; > bool record_llvm_ir; > > + struct disk_cache *disk_shader_cache; > + > mtx_t shader_parts_mutex; > struct si_shader_part *vs_prologs; > struct si_shader_part *tcs_epilogs; > struct si_shader_part *gs_prologs; > struct si_shader_part *ps_prologs; > struct si_shader_part *ps_epilogs; > > /* Shader cache in memory. > * > * Design & limitations: > @@ -132,20 +134,22 @@ struct si_screen { > struct util_queue shader_compiler_queue; > /* Use at most 3 normal compiler threads on quadcore and better. > * Hyperthreaded CPUs report the number of threads, but we want > * the number of cores. */ > LLVMTargetMachineRef tm[3]; /* used by the queue only */ > > struct util_queue shader_compiler_queue_low_priority; > /* Use at most 2 low priority threads on quadcore and better. > * We want to minimize the impact on multithreaded Mesa. */ > LLVMTargetMachineRef tm_low_priority[2]; /* at most 2 > threads */ > + > + char renderer_string[100]; > }; > > struct si_blend_color { > struct r600_atom atom; > struct pipe_blend_color state; > bool any_nonzeros; > }; > > struct si_sampler_view { > struct pipe_sampler_view base; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 3edc340f01f..e1c70aaea26 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -199,61 +199,61 @@ static bool si_shader_cache_insert_shader(struct > si_screen *sscreen, > hw_binary = si_get_shader_binary(shader); > if (!hw_binary) > return false; > > if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary, > hw_binary) == NULL) { > FREE(hw_binary); > return false; > } > > - if (sscreen->b.disk_shader_cache && insert_into_disk_cache) { > - disk_cache_compute_key(sscreen->b.disk_shader_cache, > tgsi_binary, > + if (sscreen->disk_shader_cache && insert_into_disk_cache) { > + disk_cache_compute_key(sscreen->disk_shader_cache, > tgsi_binary, > *((uint32_t *)tgsi_binary), key); > - disk_cache_put(sscreen->b.disk_shader_cache, key, hw_binary, > + disk_cache_put(sscreen->disk_shader_cache, key, hw_binary, > *((uint32_t *) hw_binary), NULL); > } > > return true; > } > > static bool si_shader_cache_load_shader(struct si_screen *sscreen, > void *tgsi_binary, > struct si_shader *shader) > { > struct hash_entry *entry = > _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary); > if (!entry) { > - if (sscreen->b.disk_shader_cache) { > + if (sscreen->disk_shader_cache) { > unsigned char sha1[CACHE_KEY_SIZE]; > size_t tg_size = *((uint32_t *) tgsi_binary); > > - disk_cache_compute_key(sscreen->b.disk_shader_cache, > + disk_cache_compute_key(sscreen->disk_shader_cache, > tgsi_binary, tg_size, sha1); > > size_t binary_size; > uint8_t *buffer = > - disk_cache_get(sscreen->b.disk_shader_cache, > + disk_cache_get(sscreen->disk_shader_cache, > sha1, &binary_size); > if (!buffer) > return false; > > if (binary_size < sizeof(uint32_t) || > *((uint32_t*)buffer) != binary_size) { > /* Something has gone wrong discard the item > * from the cache and rebuild/link from > * source. > */ > assert(!"Invalid radeonsi shader disk cache " > "item!"); > > - > disk_cache_remove(sscreen->b.disk_shader_cache, > + disk_cache_remove(sscreen->disk_shader_cache, > sha1); > free(buffer); > > return false; > } > > if (!si_load_shader_binary(shader, buffer)) { > free(buffer); > return false; > } > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev