Re: [Mesa-dev] [PATCH] radeonsi: move caps, vendor/device name, and disk shader cache to radeonsi folder

Marek Olšák Tue, 28 Nov 2017 11:24:49 -0800

Can we use this instead?
https://cgit.freedesktop.org/~mareko/mesa/commit/?h=master&id=65f55ddb4b0140f0beb0868381be5edac64b5137


Marek

On Tue, Nov 28, 2017 at 3:45 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haeh...@amd.com>
>
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c   | 431 -----------------------
>  src/gallium/drivers/radeon/r600_pipe_common.h   |   4 -
>  src/gallium/drivers/radeonsi/si_pipe.c          | 432 
> ++++++++++++++++++++++++
>  src/gallium/drivers/radeonsi/si_pipe.h          |   4 +
>  src/gallium/drivers/radeonsi/si_state_shaders.c |  14 +-
>  5 files changed, 443 insertions(+), 442 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index ce612113c51..036f380b0b3 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -29,21 +29,20 @@
>  #include "util/u_memory.h"
>  #include "util/u_format_s3tc.h"
>  #include "util/u_upload_mgr.h"
>  #include "util/os_time.h"
>  #include "vl/vl_decoder.h"
>  #include "vl/vl_video_buffer.h"
>  #include "radeon/radeon_video.h"
>  #include "amd/common/ac_llvm_util.h"
>  #include "amd/common/sid.h"
>  #include <inttypes.h>
> -#include <sys/utsname.h>
>
>  #include <llvm-c/TargetMachine.h>
>
>
>  /*
>   * shader binary helpers.
>   */
>  void si_radeon_shader_binary_init(struct ac_shader_binary *b)
>  {
>         memset(b, 0, sizeof(*b));
> @@ -632,139 +631,20 @@ static const struct debug_named_value 
> common_debug_options[] = {
>         { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
>         { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
>         { "dpbb", DBG(DPBB), "Enable DPBB." },
>         { "dfsm", DBG(DFSM), "Enable DFSM." },
>         { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order 
> rasterization" },
>         { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per 
> context." },
>
>         DEBUG_NAMED_VALUE_END /* must be last */
>  };
>
> -static const char* r600_get_vendor(struct pipe_screen* pscreen)
> -{
> -       return "X.Org";
> -}
> -
> -static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
> -{
> -       return "AMD";
> -}
> -
> -static const char *r600_get_marketing_name(struct radeon_winsys *ws)
> -{
> -       if (!ws->get_chip_name)
> -               return NULL;
> -       return ws->get_chip_name(ws);
> -}
> -
> -static const char *r600_get_family_name(const struct r600_common_screen 
> *rscreen)
> -{
> -       switch (rscreen->info.family) {
> -       case CHIP_TAHITI: return "AMD TAHITI";
> -       case CHIP_PITCAIRN: return "AMD PITCAIRN";
> -       case CHIP_VERDE: return "AMD CAPE VERDE";
> -       case CHIP_OLAND: return "AMD OLAND";
> -       case CHIP_HAINAN: return "AMD HAINAN";
> -       case CHIP_BONAIRE: return "AMD BONAIRE";
> -       case CHIP_KAVERI: return "AMD KAVERI";
> -       case CHIP_KABINI: return "AMD KABINI";
> -       case CHIP_HAWAII: return "AMD HAWAII";
> -       case CHIP_MULLINS: return "AMD MULLINS";
> -       case CHIP_TONGA: return "AMD TONGA";
> -       case CHIP_ICELAND: return "AMD ICELAND";
> -       case CHIP_CARRIZO: return "AMD CARRIZO";
> -       case CHIP_FIJI: return "AMD FIJI";
> -       case CHIP_POLARIS10: return "AMD POLARIS10";
> -       case CHIP_POLARIS11: return "AMD POLARIS11";
> -       case CHIP_POLARIS12: return "AMD POLARIS12";
> -       case CHIP_STONEY: return "AMD STONEY";
> -       case CHIP_VEGA10: return "AMD VEGA10";
> -       case CHIP_RAVEN: return "AMD RAVEN";
> -       default: return "AMD unknown";
> -       }
> -}
> -
> -static void r600_disk_cache_create(struct r600_common_screen *rscreen)
> -{
> -       /* Don't use the cache if shader dumping is enabled. */
> -       if (rscreen->debug_flags & DBG_ALL_SHADERS)
> -               return;
> -
> -       /* TODO: remove this once gallium supports a nir cache */
> -       if (rscreen->debug_flags & DBG(NIR))
> -               return;
> -
> -       uint32_t mesa_timestamp;
> -       if (disk_cache_get_function_timestamp(r600_disk_cache_create,
> -                                             &mesa_timestamp)) {
> -               char *timestamp_str;
> -               int res = -1;
> -               uint32_t llvm_timestamp;
> -
> -               if 
> (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
> -                                                     &llvm_timestamp)) {
> -                       res = asprintf(&timestamp_str, "%u_%u",
> -                                      mesa_timestamp, llvm_timestamp);
> -               }
> -
> -               if (res != -1) {
> -                       /* These flags affect shader compilation. */
> -                       uint64_t shader_debug_flags =
> -                               rscreen->debug_flags &
> -                               (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
> -                                DBG(SI_SCHED) |
> -                                DBG(UNSAFE_MATH));
> -
> -                       rscreen->disk_shader_cache =
> -                               
> disk_cache_create(r600_get_family_name(rscreen),
> -                                                 timestamp_str,
> -                                                 shader_debug_flags);
> -                       free(timestamp_str);
> -               }
> -       }
> -}
> -
> -static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen 
> *pscreen)
> -{
> -       struct r600_common_screen *rscreen = (struct 
> r600_common_screen*)pscreen;
> -       return rscreen->disk_shader_cache;
> -}
> -
> -static const char* r600_get_name(struct pipe_screen* pscreen)
> -{
> -       struct r600_common_screen *rscreen = (struct 
> r600_common_screen*)pscreen;
> -
> -       return rscreen->renderer_string;
> -}
> -
> -static float r600_get_paramf(struct pipe_screen* pscreen,
> -                            enum pipe_capf param)
> -{
> -       switch (param) {
> -       case PIPE_CAPF_MAX_LINE_WIDTH:
> -       case PIPE_CAPF_MAX_LINE_WIDTH_AA:
> -       case PIPE_CAPF_MAX_POINT_WIDTH:
> -       case PIPE_CAPF_MAX_POINT_WIDTH_AA:
> -               return 8192.0f;
> -       case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
> -               return 16.0f;
> -       case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
> -               return 16.0f;
> -       case PIPE_CAPF_GUARD_BAND_LEFT:
> -       case PIPE_CAPF_GUARD_BAND_TOP:
> -       case PIPE_CAPF_GUARD_BAND_RIGHT:
> -       case PIPE_CAPF_GUARD_BAND_BOTTOM:
> -               return 0.0f;
> -       }
> -       return 0.0f;
> -}
> -
>  static int r600_get_video_param(struct pipe_screen *screen,
>                                 enum pipe_video_profile profile,
>                                 enum pipe_video_entrypoint entrypoint,
>                                 enum pipe_video_cap param)
>  {
>         switch (param) {
>         case PIPE_VIDEO_CAP_SUPPORTED:
>                 return vl_profile_supported(screen, profile, entrypoint);
>         case PIPE_VIDEO_CAP_NPOT_TEXTURES:
>                 return 1;
> @@ -779,390 +659,79 @@ static int r600_get_video_param(struct pipe_screen 
> *screen,
>                 return false;
>         case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
>                 return true;
>         case PIPE_VIDEO_CAP_MAX_LEVEL:
>                 return vl_level_supported(screen, profile);
>         default:
>                 return 0;
>         }
>  }
>
> -static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
> -                                         enum pipe_shader_ir ir_type)
> -{
> -       if (ir_type != PIPE_SHADER_IR_TGSI)
> -               return 256;
> -
> -       /* Only 16 waves per thread-group on gfx9. */
> -       if (screen->chip_class >= GFX9)
> -               return 1024;
> -
> -       /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
> -        * round number.
> -        */
> -       return 2048;
> -}
> -
> -static int r600_get_compute_param(struct pipe_screen *screen,
> -        enum pipe_shader_ir ir_type,
> -        enum pipe_compute_cap param,
> -        void *ret)
> -{
> -       struct r600_common_screen *rscreen = (struct r600_common_screen 
> *)screen;
> -
> -       //TODO: select these params by asic
> -       switch (param) {
> -       case PIPE_COMPUTE_CAP_IR_TARGET: {
> -               const char *gpu;
> -               const char *triple;
> -
> -               if (HAVE_LLVM < 0x0400)
> -                       triple = "amdgcn--";
> -               else
> -                       triple = "amdgcn-mesa-mesa3d";
> -
> -               gpu = ac_get_llvm_processor_name(rscreen->family);
> -               if (ret) {
> -                       sprintf(ret, "%s-%s", gpu, triple);
> -               }
> -               /* +2 for dash and terminating NIL byte */
> -               return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
> -       }
> -       case PIPE_COMPUTE_CAP_GRID_DIMENSION:
> -               if (ret) {
> -                       uint64_t *grid_dimension = ret;
> -                       grid_dimension[0] = 3;
> -               }
> -               return 1 * sizeof(uint64_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
> -               if (ret) {
> -                       uint64_t *grid_size = ret;
> -                       grid_size[0] = 65535;
> -                       grid_size[1] = 65535;
> -                       grid_size[2] = 65535;
> -               }
> -               return 3 * sizeof(uint64_t) ;
> -
> -       case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
> -               if (ret) {
> -                       uint64_t *block_size = ret;
> -                       unsigned threads_per_block = 
> get_max_threads_per_block(rscreen, ir_type);
> -                       block_size[0] = threads_per_block;
> -                       block_size[1] = threads_per_block;
> -                       block_size[2] = threads_per_block;
> -               }
> -               return 3 * sizeof(uint64_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
> -               if (ret) {
> -                       uint64_t *max_threads_per_block = ret;
> -                       *max_threads_per_block = 
> get_max_threads_per_block(rscreen, ir_type);
> -               }
> -               return sizeof(uint64_t);
> -       case PIPE_COMPUTE_CAP_ADDRESS_BITS:
> -               if (ret) {
> -                       uint32_t *address_bits = ret;
> -                       address_bits[0] = 64;
> -               }
> -               return 1 * sizeof(uint32_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
> -               if (ret) {
> -                       uint64_t *max_global_size = ret;
> -                       uint64_t max_mem_alloc_size;
> -
> -                       r600_get_compute_param(screen, ir_type,
> -                               PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
> -                               &max_mem_alloc_size);
> -
> -                       /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
> -                        * 1/4 of the MAX_GLOBAL_SIZE.  Since the
> -                        * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
> -                        * make sure we never report more than
> -                        * 4 * MAX_MEM_ALLOC_SIZE.
> -                        */
> -                       *max_global_size = MIN2(4 * max_mem_alloc_size,
> -                                               MAX2(rscreen->info.gart_size,
> -                                                    
> rscreen->info.vram_size));
> -               }
> -               return sizeof(uint64_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
> -               if (ret) {
> -                       uint64_t *max_local_size = ret;
> -                       /* Value reported by the closed source driver. */
> -                       *max_local_size = 32768;
> -               }
> -               return sizeof(uint64_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
> -               if (ret) {
> -                       uint64_t *max_input_size = ret;
> -                       /* Value reported by the closed source driver. */
> -                       *max_input_size = 1024;
> -               }
> -               return sizeof(uint64_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
> -               if (ret) {
> -                       uint64_t *max_mem_alloc_size = ret;
> -
> -                       *max_mem_alloc_size = rscreen->info.max_alloc_size;
> -               }
> -               return sizeof(uint64_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
> -               if (ret) {
> -                       uint32_t *max_clock_frequency = ret;
> -                       *max_clock_frequency = rscreen->info.max_shader_clock;
> -               }
> -               return sizeof(uint32_t);
> -
> -       case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
> -               if (ret) {
> -                       uint32_t *max_compute_units = ret;
> -                       *max_compute_units = 
> rscreen->info.num_good_compute_units;
> -               }
> -               return sizeof(uint32_t);
> -
> -       case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
> -               if (ret) {
> -                       uint32_t *images_supported = ret;
> -                       *images_supported = 0;
> -               }
> -               return sizeof(uint32_t);
> -       case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
> -               break; /* unused */
> -       case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
> -               if (ret) {
> -                       uint32_t *subgroup_size = ret;
> -                       *subgroup_size = 64;
> -               }
> -               return sizeof(uint32_t);
> -       case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
> -               if (ret) {
> -                       uint64_t *max_variable_threads_per_block = ret;
> -                       if (ir_type == PIPE_SHADER_IR_TGSI)
> -                               *max_variable_threads_per_block = 
> SI_MAX_VARIABLE_THREADS_PER_BLOCK;
> -                       else
> -                               *max_variable_threads_per_block = 0;
> -               }
> -               return sizeof(uint64_t);
> -       }
> -
> -        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
> -        return 0;
> -}
> -
> -static uint64_t r600_get_timestamp(struct pipe_screen *screen)
> -{
> -       struct r600_common_screen *rscreen = (struct 
> r600_common_screen*)screen;
> -
> -       return 1000000 * rscreen->ws->query_value(rscreen->ws, 
> RADEON_TIMESTAMP) /
> -                       rscreen->info.clock_crystal_freq;
> -}
> -
> -static void r600_query_memory_info(struct pipe_screen *screen,
> -                                  struct pipe_memory_info *info)
> -{
> -       struct r600_common_screen *rscreen = (struct 
> r600_common_screen*)screen;
> -       struct radeon_winsys *ws = rscreen->ws;
> -       unsigned vram_usage, gtt_usage;
> -
> -       info->total_device_memory = rscreen->info.vram_size / 1024;
> -       info->total_staging_memory = rscreen->info.gart_size / 1024;
> -
> -       /* The real TTM memory usage is somewhat random, because:
> -        *
> -        * 1) TTM delays freeing memory, because it can only free it after
> -        *    fences expire.
> -        *
> -        * 2) The memory usage can be really low if big VRAM evictions are
> -        *    taking place, but the real usage is well above the size of VRAM.
> -        *
> -        * Instead, return statistics of this process.
> -        */
> -       vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
> -       gtt_usage =  ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
> -
> -       info->avail_device_memory =
> -               vram_usage <= info->total_device_memory ?
> -                               info->total_device_memory - vram_usage : 0;
> -       info->avail_staging_memory =
> -               gtt_usage <= info->total_staging_memory ?
> -                               info->total_staging_memory - gtt_usage : 0;
> -
> -       info->device_memory_evicted =
> -               ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
> -
> -       if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
> -               info->nr_device_memory_evictions =
> -                       ws->query_value(ws, RADEON_NUM_EVICTIONS);
> -       else
> -               /* Just return the number of evicted 64KB pages. */
> -               info->nr_device_memory_evictions = 
> info->device_memory_evicted / 64;
> -}
> -
>  struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
>                                                 const struct pipe_resource 
> *templ)
>  {
>         if (templ->target == PIPE_BUFFER) {
>                 return si_buffer_create(screen, templ, 256);
>         } else {
>                 return si_texture_create(screen, templ);
>         }
>  }
>
>  bool si_common_screen_init(struct r600_common_screen *rscreen,
>                            struct radeon_winsys *ws)
>  {
> -       char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] 
> = {};
> -       struct utsname uname_data;
> -       const char *chip_name;
> -
> -       ws->query_info(ws, &rscreen->info);
> -       rscreen->ws = ws;
> -
> -       if ((chip_name = r600_get_marketing_name(ws)))
> -               snprintf(family_name, sizeof(family_name), "%s / ",
> -                        r600_get_family_name(rscreen) + 4);
> -       else
> -               chip_name = r600_get_family_name(rscreen);
> -
> -       if (uname(&uname_data) == 0)
> -               snprintf(kernel_version, sizeof(kernel_version),
> -                        " / %s", uname_data.release);
> -
> -       if (HAVE_LLVM > 0) {
> -               snprintf(llvm_string, sizeof(llvm_string),
> -                        ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
> -                        HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
> -       }
> -
> -       snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
> -                "%s (%sDRM %i.%i.%i%s%s)",
> -                chip_name, family_name, rscreen->info.drm_major,
> -                rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
> -                kernel_version, llvm_string);
> -
> -       rscreen->b.get_name = r600_get_name;
> -       rscreen->b.get_vendor = r600_get_vendor;
> -       rscreen->b.get_device_vendor = r600_get_device_vendor;
> -       rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
> -       rscreen->b.get_compute_param = r600_get_compute_param;
> -       rscreen->b.get_paramf = r600_get_paramf;
> -       rscreen->b.get_timestamp = r600_get_timestamp;
>         rscreen->b.resource_destroy = u_resource_destroy_vtbl;
>         rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
> -       rscreen->b.query_memory_info = r600_query_memory_info;
>
>         if (rscreen->info.has_hw_decode) {
>                 rscreen->b.get_video_param = si_vid_get_video_param;
>                 rscreen->b.is_video_format_supported = 
> si_vid_is_format_supported;
>         } else {
>                 rscreen->b.get_video_param = r600_get_video_param;
>                 rscreen->b.is_video_format_supported = 
> vl_video_buffer_is_format_supported;
>         }
>
>         si_init_screen_texture_functions(rscreen);
>         si_init_screen_query_functions(rscreen);
>
>         rscreen->family = rscreen->info.family;
>         rscreen->chip_class = rscreen->info.chip_class;
>         rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", 
> common_debug_options, 0);
>         rscreen->has_rbplus = false;
>         rscreen->rbplus_allowed = false;
>
> -       r600_disk_cache_create(rscreen);
> -
>         slab_create_parent(&rscreen->pool_transfers, sizeof(struct 
> r600_transfer), 64);
>
>         rscreen->force_aniso = MIN2(16, 
> debug_get_num_option("R600_TEX_ANISO", -1));
>         if (rscreen->force_aniso >= 0) {
>                 printf("radeon: Forcing anisotropy filter to %ix\n",
>                        /* round down to a power of two */
>                        1 << util_logbase2(rscreen->force_aniso));
>         }
>
>         (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
>         (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
>
> -       if (rscreen->debug_flags & DBG(INFO)) {
> -               printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
> -                      rscreen->info.pci_domain, rscreen->info.pci_bus,
> -                      rscreen->info.pci_dev, rscreen->info.pci_func);
> -               printf("pci_id = 0x%x\n", rscreen->info.pci_id);
> -               printf("family = %i (%s)\n", rscreen->info.family,
> -                      r600_get_family_name(rscreen));
> -               printf("chip_class = %i\n", rscreen->info.chip_class);
> -               printf("pte_fragment_size = %u\n", 
> rscreen->info.pte_fragment_size);
> -               printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
> -               printf("gart_size = %i MB\n", 
> (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
> -               printf("vram_size = %i MB\n", 
> (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
> -               printf("vram_vis_size = %i MB\n", 
> (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
> -               printf("max_alloc_size = %i MB\n",
> -                      (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 
> 1024*1024));
> -               printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
> -               printf("has_dedicated_vram = %u\n", 
> rscreen->info.has_dedicated_vram);
> -               printf("has_virtual_memory = %i\n", 
> rscreen->info.has_virtual_memory);
> -               printf("gfx_ib_pad_with_type2 = %i\n", 
> rscreen->info.gfx_ib_pad_with_type2);
> -               printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
> -               printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
> -               printf("num_compute_rings = %u\n", 
> rscreen->info.num_compute_rings);
> -               printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
> -               printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
> -               printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
> -               printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
> -               printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
> -               printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
> -               printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
> -               printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
> -               printf("vce_harvest_config = %i\n", 
> rscreen->info.vce_harvest_config);
> -               printf("clock_crystal_freq = %i\n", 
> rscreen->info.clock_crystal_freq);
> -               printf("tcc_cache_line_size = %u\n", 
> rscreen->info.tcc_cache_line_size);
> -               printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
> -                      rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
> -               printf("has_userptr = %i\n", rscreen->info.has_userptr);
> -               printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
> -               printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
> -
> -               printf("r600_max_quad_pipes = %i\n", 
> rscreen->info.r600_max_quad_pipes);
> -               printf("max_shader_clock = %i\n", 
> rscreen->info.max_shader_clock);
> -               printf("num_good_compute_units = %i\n", 
> rscreen->info.num_good_compute_units);
> -               printf("max_se = %i\n", rscreen->info.max_se);
> -               printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
> -
> -               printf("r600_gb_backend_map = %i\n", 
> rscreen->info.r600_gb_backend_map);
> -               printf("r600_gb_backend_map_valid = %i\n", 
> rscreen->info.r600_gb_backend_map_valid);
> -               printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
> -               printf("num_render_backends = %i\n", 
> rscreen->info.num_render_backends);
> -               printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
> -               printf("pipe_interleave_bytes = %i\n", 
> rscreen->info.pipe_interleave_bytes);
> -               printf("enabled_rb_mask = 0x%x\n", 
> rscreen->info.enabled_rb_mask);
> -               printf("max_alignment = %u\n", 
> (unsigned)rscreen->info.max_alignment);
> -       }
>         return true;
>  }
>
>  void si_destroy_common_screen(struct r600_common_screen *rscreen)
>  {
>         si_perfcounters_destroy(rscreen);
>         si_gpu_load_kill_thread(rscreen);
>
>         mtx_destroy(&rscreen->gpu_load_mutex);
>         mtx_destroy(&rscreen->aux_context_lock);
>         rscreen->aux_context->destroy(rscreen->aux_context);
>
>         slab_destroy_parent(&rscreen->pool_transfers);
>
> -       disk_cache_destroy(rscreen->disk_shader_cache);
>         rscreen->ws->destroy(rscreen->ws);
>         FREE(rscreen);
>  }
>
>  bool si_can_dump_shader(struct r600_common_screen *rscreen,
>                         unsigned processor)
>  {
>         return rscreen->debug_flags & (1 << processor);
>  }
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index adfcc7c8a70..4b80d188fba 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -387,22 +387,20 @@ struct r600_memory_object {
>  struct r600_common_screen {
>         struct pipe_screen              b;
>         struct radeon_winsys            *ws;
>         enum radeon_family              family;
>         enum chip_class                 chip_class;
>         struct radeon_info              info;
>         uint64_t                        debug_flags;
>         bool                            has_rbplus;     /* if RB+ registers 
> exist */
>         bool                            rbplus_allowed; /* if RB+ is allowed 
> */
>
> -       struct disk_cache               *disk_shader_cache;
> -
>         struct slab_parent_pool         pool_transfers;
>
>         /* Texture filter settings. */
>         int                             force_aniso; /* -1 = disabled */
>
>         /* Auxiliary context. Mainly used to initialize resources.
>          * It must be locked prior to using and flushed before unlocking. */
>         struct pipe_context             *aux_context;
>         mtx_t                           aux_context_lock;
>
> @@ -415,22 +413,20 @@ struct r600_common_screen {
>          */
>         unsigned                        num_shaders_created;
>         unsigned                        num_shader_cache_hits;
>
>         /* GPU load thread. */
>         mtx_t                           gpu_load_mutex;
>         thrd_t                          gpu_load_thread;
>         union r600_mmio_counters        mmio_counters;
>         volatile unsigned               gpu_load_stop_thread; /* bool */
>
> -       char                            renderer_string[100];
> -
>         /* Performance counters. */
>         struct r600_perfcounters        *perfcounters;
>
>         /* If pipe_screen wants to recompute and re-emit the framebuffer,
>          * sampler, and image states of all contexts, it should atomically
>          * increment this.
>          *
>          * Each context will compare this with its own last known value of
>          * the counter before drawing and re-emit the states accordingly.
>          */
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index b3d8ae508bd..b38c55619f7 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -31,20 +31,22 @@
>  #include "util/u_log.h"
>  #include "util/u_memory.h"
>  #include "util/u_suballoc.h"
>  #include "util/u_tests.h"
>  #include "util/xmlconfig.h"
>  #include "vl/vl_decoder.h"
>  #include "../ddebug/dd_util.h"
>
>  #include "compiler/nir/nir.h"
>
> +#include <sys/utsname.h>
> +
>  /*
>   * pipe_context
>   */
>  static void si_destroy_context(struct pipe_context *context)
>  {
>         struct si_context *sctx = (struct si_context *)context;
>         int i;
>
>         /* Unreference the framebuffer normally to disable related logic
>          * properly.
> @@ -394,20 +396,306 @@ static struct pipe_context 
> *si_pipe_create_context(struct pipe_screen *screen,
>          * implementation for fence_server_sync is incomplete. */
>         return threaded_context_create(ctx, &sscreen->b.pool_transfers,
>                                        si_replace_buffer_storage,
>                                        sscreen->b.info.drm_major >= 3 ? 
> si_create_fence : NULL,
>                                        &((struct si_context*)ctx)->b.tc);
>  }
>
>  /*
>   * pipe_screen
>   */
> +static const char* si_get_vendor(struct pipe_screen* pscreen)
> +{
> +       return "X.Org";
> +}
> +
> +static const char* si_get_device_vendor(struct pipe_screen* pscreen)
> +{
> +       return "AMD";
> +}
> +
> +static const char *si_get_marketing_name(struct radeon_winsys *ws)
> +{
> +       if (!ws->get_chip_name)
> +               return NULL;
> +       return ws->get_chip_name(ws);
> +}
> +
> +static const char *si_get_family_name(const struct si_screen *screen)
> +{
> +       switch (screen->b.info.family) {
> +       case CHIP_TAHITI: return "AMD TAHITI";
> +       case CHIP_PITCAIRN: return "AMD PITCAIRN";
> +       case CHIP_VERDE: return "AMD CAPE VERDE";
> +       case CHIP_OLAND: return "AMD OLAND";
> +       case CHIP_HAINAN: return "AMD HAINAN";
> +       case CHIP_BONAIRE: return "AMD BONAIRE";
> +       case CHIP_KAVERI: return "AMD KAVERI";
> +       case CHIP_KABINI: return "AMD KABINI";
> +       case CHIP_HAWAII: return "AMD HAWAII";
> +       case CHIP_MULLINS: return "AMD MULLINS";
> +       case CHIP_TONGA: return "AMD TONGA";
> +       case CHIP_ICELAND: return "AMD ICELAND";
> +       case CHIP_CARRIZO: return "AMD CARRIZO";
> +       case CHIP_FIJI: return "AMD FIJI";
> +       case CHIP_POLARIS10: return "AMD POLARIS10";
> +       case CHIP_POLARIS11: return "AMD POLARIS11";
> +       case CHIP_POLARIS12: return "AMD POLARIS12";
> +       case CHIP_STONEY: return "AMD STONEY";
> +       case CHIP_VEGA10: return "AMD VEGA10";
> +       case CHIP_RAVEN: return "AMD RAVEN";
> +       default: return "AMD unknown";
> +       }
> +}
> +
> +static void si_disk_cache_create(struct si_screen *screen)
> +{
> +       /* Don't use the cache if shader dumping is enabled. */
> +       if (screen->b.debug_flags & DBG_ALL_SHADERS)
> +               return;
> +
> +       /* TODO: remove this once gallium supports a nir cache */
> +       if (screen->b.debug_flags & DBG(NIR))
> +               return;
> +
> +       uint32_t mesa_timestamp;
> +       if (disk_cache_get_function_timestamp(si_disk_cache_create,
> +                                             &mesa_timestamp)) {
> +               char *timestamp_str;
> +               int res = -1;
> +               uint32_t llvm_timestamp;
> +
> +               if 
> (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
> +                                                     &llvm_timestamp)) {
> +                       res = asprintf(&timestamp_str, "%u_%u",
> +                                      mesa_timestamp, llvm_timestamp);
> +               }
> +
> +               if (res != -1) {
> +                       /* These flags affect shader compilation. */
> +                       uint64_t shader_debug_flags =
> +                               screen->b.debug_flags &
> +                               (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
> +                                DBG(SI_SCHED) |
> +                                DBG(UNSAFE_MATH));
> +
> +                       screen->disk_shader_cache =
> +                               disk_cache_create(si_get_family_name(screen),
> +                                                 timestamp_str,
> +                                                 shader_debug_flags);
> +                       free(timestamp_str);
> +               }
> +       }
> +}
> +
> +static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen 
> *pscreen)
> +{
> +       struct si_screen *sscreen = (struct si_screen*)pscreen;
> +       return sscreen->disk_shader_cache;
> +}
> +
> +static const char* si_get_name(struct pipe_screen* pscreen)
> +{
> +       struct si_screen *sscreen = (struct si_screen*)pscreen;
> +
> +       return sscreen->renderer_string;
> +}
> +
> +static float si_get_paramf(struct pipe_screen* pscreen,
> +                          enum pipe_capf param)
> +{
> +       switch (param) {
> +       case PIPE_CAPF_MAX_LINE_WIDTH:
> +       case PIPE_CAPF_MAX_LINE_WIDTH_AA:
> +       case PIPE_CAPF_MAX_POINT_WIDTH:
> +       case PIPE_CAPF_MAX_POINT_WIDTH_AA:
> +               return 8192.0f;
> +       case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
> +               return 16.0f;
> +       case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
> +               return 16.0f;
> +       case PIPE_CAPF_GUARD_BAND_LEFT:
> +       case PIPE_CAPF_GUARD_BAND_TOP:
> +       case PIPE_CAPF_GUARD_BAND_RIGHT:
> +       case PIPE_CAPF_GUARD_BAND_BOTTOM:
> +               return 0.0f;
> +       }
> +       return 0.0f;
> +}
> +
> +static unsigned get_max_threads_per_block(struct si_screen *screen,
> +                                         enum pipe_shader_ir ir_type)
> +{
> +       if (ir_type != PIPE_SHADER_IR_TGSI)
> +               return 256;
> +
> +       /* Only 16 waves per thread-group on gfx9. */
> +       if (screen->b.chip_class >= GFX9)
> +               return 1024;
> +
> +       /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
> +        * round number.
> +        */
> +       return 2048;
> +}
> +
> +static int si_get_compute_param(struct pipe_screen *screen,
> +        enum pipe_shader_ir ir_type,
> +        enum pipe_compute_cap param,
> +        void *ret)
> +{
> +       struct si_screen *sscreen = (struct si_screen *)screen;
> +
> +       switch (param) {
> +       case PIPE_COMPUTE_CAP_IR_TARGET: {
> +               const char *gpu;
> +               const char *triple;
> +
> +               if (HAVE_LLVM < 0x0400)
> +                       triple = "amdgcn--";
> +               else
> +                       triple = "amdgcn-mesa-mesa3d";
> +
> +               gpu = ac_get_llvm_processor_name(sscreen->b.family);
> +               if (ret) {
> +                       sprintf(ret, "%s-%s", gpu, triple);
> +               }
> +               /* +2 for dash and terminating NIL byte */
> +               return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
> +       }
> +       case PIPE_COMPUTE_CAP_GRID_DIMENSION:
> +               if (ret) {
> +                       uint64_t *grid_dimension = ret;
> +                       grid_dimension[0] = 3;
> +               }
> +               return 1 * sizeof(uint64_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
> +               if (ret) {
> +                       uint64_t *grid_size = ret;
> +                       grid_size[0] = 65535;
> +                       grid_size[1] = 65535;
> +                       grid_size[2] = 65535;
> +               }
> +               return 3 * sizeof(uint64_t) ;
> +
> +       case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
> +               if (ret) {
> +                       uint64_t *block_size = ret;
> +                       unsigned threads_per_block = 
> get_max_threads_per_block(sscreen, ir_type);
> +                       block_size[0] = threads_per_block;
> +                       block_size[1] = threads_per_block;
> +                       block_size[2] = threads_per_block;
> +               }
> +               return 3 * sizeof(uint64_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
> +               if (ret) {
> +                       uint64_t *max_threads_per_block = ret;
> +                       *max_threads_per_block = 
> get_max_threads_per_block(sscreen, ir_type);
> +               }
> +               return sizeof(uint64_t);
> +       case PIPE_COMPUTE_CAP_ADDRESS_BITS:
> +               if (ret) {
> +                       uint32_t *address_bits = ret;
> +                       address_bits[0] = 64;
> +               }
> +               return 1 * sizeof(uint32_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
> +               if (ret) {
> +                       uint64_t *max_global_size = ret;
> +                       uint64_t max_mem_alloc_size;
> +
> +                       si_get_compute_param(screen, ir_type,
> +                               PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
> +                               &max_mem_alloc_size);
> +
> +                       /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
> +                        * 1/4 of the MAX_GLOBAL_SIZE.  Since the
> +                        * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
> +                        * make sure we never report more than
> +                        * 4 * MAX_MEM_ALLOC_SIZE.
> +                        */
> +                       *max_global_size = MIN2(4 * max_mem_alloc_size,
> +                                               
> MAX2(sscreen->b.info.gart_size,
> +                                                    
> sscreen->b.info.vram_size));
> +               }
> +               return sizeof(uint64_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
> +               if (ret) {
> +                       uint64_t *max_local_size = ret;
> +                       /* Value reported by the closed source driver. */
> +                       *max_local_size = 32768;
> +               }
> +               return sizeof(uint64_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
> +               if (ret) {
> +                       uint64_t *max_input_size = ret;
> +                       /* Value reported by the closed source driver. */
> +                       *max_input_size = 1024;
> +               }
> +               return sizeof(uint64_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
> +               if (ret) {
> +                       uint64_t *max_mem_alloc_size = ret;
> +
> +                       *max_mem_alloc_size = sscreen->b.info.max_alloc_size;
> +               }
> +               return sizeof(uint64_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
> +               if (ret) {
> +                       uint32_t *max_clock_frequency = ret;
> +                       *max_clock_frequency = 
> sscreen->b.info.max_shader_clock;
> +               }
> +               return sizeof(uint32_t);
> +
> +       case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
> +               if (ret) {
> +                       uint32_t *max_compute_units = ret;
> +                       *max_compute_units = 
> sscreen->b.info.num_good_compute_units;
> +               }
> +               return sizeof(uint32_t);
> +
> +       case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
> +               if (ret) {
> +                       uint32_t *images_supported = ret;
> +                       *images_supported = 0;
> +               }
> +               return sizeof(uint32_t);
> +       case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
> +               break; /* unused */
> +       case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
> +               if (ret) {
> +                       uint32_t *subgroup_size = ret;
> +                       *subgroup_size = 64;
> +               }
> +               return sizeof(uint32_t);
> +       case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
> +               if (ret) {
> +                       uint64_t *max_variable_threads_per_block = ret;
> +                       if (ir_type == PIPE_SHADER_IR_TGSI)
> +                               *max_variable_threads_per_block = 
> SI_MAX_VARIABLE_THREADS_PER_BLOCK;
> +                       else
> +                               *max_variable_threads_per_block = 0;
> +               }
> +               return sizeof(uint64_t);
> +       }
> +
> +        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
> +        return 0;
> +}
> +
>  static bool si_have_tgsi_compute(struct si_screen *sscreen)
>  {
>         /* Old kernels disallowed some register writes for SI
>          * that are used for indirect dispatches. */
>         return (sscreen->b.chip_class >= CIK ||
>                 sscreen->b.info.drm_major == 3 ||
>                 (sscreen->b.info.drm_major == 2 &&
>                  sscreen->b.info.drm_minor >= 45));
>  }
>
> @@ -823,20 +1111,69 @@ static const struct nir_shader_compiler_options 
> nir_options = {
>
>  static const void *
>  si_get_compiler_options(struct pipe_screen *screen,
>                         enum pipe_shader_ir ir,
>                         enum pipe_shader_type shader)
>  {
>         assert(ir == PIPE_SHADER_IR_NIR);
>         return &nir_options;
>  }
>
> +static uint64_t si_get_timestamp(struct pipe_screen *screen)
> +{
> +       struct r600_common_screen *rscreen = (struct 
> r600_common_screen*)screen;
> +
> +       return 1000000 * rscreen->ws->query_value(rscreen->ws, 
> RADEON_TIMESTAMP) /
> +                       rscreen->info.clock_crystal_freq;
> +}
> +
> +static void si_query_memory_info(struct pipe_screen *screen,
> +                                struct pipe_memory_info *info)
> +{
> +       struct r600_common_screen *rscreen = (struct 
> r600_common_screen*)screen;
> +       struct radeon_winsys *ws = rscreen->ws;
> +       unsigned vram_usage, gtt_usage;
> +
> +       info->total_device_memory = rscreen->info.vram_size / 1024;
> +       info->total_staging_memory = rscreen->info.gart_size / 1024;
> +
> +       /* The real TTM memory usage is somewhat random, because:
> +        *
> +        * 1) TTM delays freeing memory, because it can only free it after
> +        *    fences expire.
> +        *
> +        * 2) The memory usage can be really low if big VRAM evictions are
> +        *    taking place, but the real usage is well above the size of VRAM.
> +        *
> +        * Instead, return statistics of this process.
> +        */
> +       vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
> +       gtt_usage =  ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
> +
> +       info->avail_device_memory =
> +               vram_usage <= info->total_device_memory ?
> +                               info->total_device_memory - vram_usage : 0;
> +       info->avail_staging_memory =
> +               gtt_usage <= info->total_staging_memory ?
> +                               info->total_staging_memory - gtt_usage : 0;
> +
> +       info->device_memory_evicted =
> +               ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
> +
> +       if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
> +               info->nr_device_memory_evictions =
> +                       ws->query_value(ws, RADEON_NUM_EVICTIONS);
> +       else
> +               /* Just return the number of evicted 64KB pages. */
> +               info->nr_device_memory_evictions = 
> info->device_memory_evicted / 64;
> +}
> +
>  static void si_destroy_screen(struct pipe_screen* pscreen)
>  {
>         struct si_screen *sscreen = (struct si_screen *)pscreen;
>         struct si_shader_part *parts[] = {
>                 sscreen->vs_prologs,
>                 sscreen->tcs_epilogs,
>                 sscreen->gs_prologs,
>                 sscreen->ps_prologs,
>                 sscreen->ps_epilogs
>         };
> @@ -861,20 +1198,21 @@ static void si_destroy_screen(struct pipe_screen* 
> pscreen)
>                 while (parts[i]) {
>                         struct si_shader_part *part = parts[i];
>
>                         parts[i] = part->next;
>                         si_radeon_shader_binary_clean(&part->binary);
>                         FREE(part);
>                 }
>         }
>         mtx_destroy(&sscreen->shader_parts_mutex);
>         si_destroy_shader_cache(sscreen);
> +       disk_cache_destroy(sscreen->disk_shader_cache);
>         si_destroy_common_screen(&sscreen->b);
>  }
>
>  static bool si_init_gs_info(struct si_screen *sscreen)
>  {
>         /* gs_table_depth is not used by GFX9 */
>         if (sscreen->b.chip_class >= GFX9)
>                 return true;
>
>         switch (sscreen->b.family) {
> @@ -977,34 +1315,71 @@ static void radeonsi_get_device_uuid(struct 
> pipe_screen *pscreen, char *uuid)
>  {
>         struct r600_common_screen *rscreen = (struct r600_common_screen 
> *)pscreen;
>
>         ac_compute_device_uuid(&rscreen->info, uuid, PIPE_UUID_SIZE);
>  }
>
>  struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
>                                            const struct pipe_screen_config 
> *config)
>  {
>         struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
> +       char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] 
> = {};
>         unsigned num_threads, num_compiler_threads, 
> num_compiler_threads_lowprio, i;
> +       struct utsname uname_data;
> +       const char *chip_name;
>
>         if (!sscreen) {
>                 return NULL;
>         }
>
> +
> +       ws->query_info(ws, &sscreen->b.info);
> +       sscreen->b.ws = ws;
> +
> +       if ((chip_name = si_get_marketing_name(ws)))
> +               snprintf(family_name, sizeof(family_name), "%s / ",
> +                        si_get_family_name(sscreen) + 4);
> +       else
> +               chip_name = si_get_family_name(sscreen);
> +
> +       if (uname(&uname_data) == 0)
> +               snprintf(kernel_version, sizeof(kernel_version),
> +                        " / %s", uname_data.release);
> +
> +       if (HAVE_LLVM > 0) {
> +               snprintf(llvm_string, sizeof(llvm_string),
> +                        ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
> +                        HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
> +       }
> +
> +       snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string),
> +                "%s (%sDRM %i.%i.%i%s%s)",
> +                chip_name, family_name, sscreen->b.info.drm_major,
> +                sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel,
> +                kernel_version, llvm_string);
> +
>         /* Set functions first. */
>         sscreen->b.b.context_create = si_pipe_create_context;
>         sscreen->b.b.destroy = si_destroy_screen;
> +       sscreen->b.b.get_name = si_get_name;
> +       sscreen->b.b.get_vendor = si_get_vendor;
> +       sscreen->b.b.get_device_vendor = si_get_device_vendor;
> +       sscreen->b.b.get_disk_shader_cache = si_get_disk_shader_cache;
> +       sscreen->b.b.get_compute_param = si_get_compute_param;
> +       sscreen->b.b.get_paramf = si_get_paramf;
>         sscreen->b.b.get_param = si_get_param;
>         sscreen->b.b.get_shader_param = si_get_shader_param;
>         sscreen->b.b.get_compiler_options = si_get_compiler_options;
>         sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid;
>         sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid;
> +       sscreen->b.b.get_timestamp = si_get_timestamp;
> +       sscreen->b.b.query_memory_info = si_query_memory_info;
>         sscreen->b.b.resource_create = si_resource_create_common;
>
>         si_init_screen_fence_functions(sscreen);
>         si_init_screen_state_functions(sscreen);
>
>         /* Set these flags in debug_flags early, so that the shader cache 
> takes
>          * them into account.
>          */
>         if (driQueryOptionb(config->options,
>                             "glsl_correct_derivatives_after_discard"))
> @@ -1012,20 +1387,22 @@ struct pipe_screen *radeonsi_screen_create(struct 
> radeon_winsys *ws,
>         if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
>                 sscreen->b.debug_flags |= DBG(SI_SCHED);
>
>         if (!si_common_screen_init(&sscreen->b, ws) ||
>             !si_init_gs_info(sscreen) ||
>             !si_init_shader_cache(sscreen)) {
>                 FREE(sscreen);
>                 return NULL;
>         }
>
> +       si_disk_cache_create(sscreen);
> +
>         /* Only enable as many threads as we have target machines, but at most
>          * the number of CPUs - 1 if there is more than one.
>          */
>         num_threads = sysconf(_SC_NPROCESSORS_ONLN);
>         num_threads = MAX2(1, num_threads - 1);
>         num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
>         num_compiler_threads_lowprio =
>                 MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));
>
>         if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
> @@ -1144,20 +1521,75 @@ struct pipe_screen *radeonsi_screen_create(struct 
> radeon_winsys *ws,
>                 sscreen->b.debug_flags |= DBG_ALL_SHADERS;
>
>         for (i = 0; i < num_compiler_threads; i++)
>                 sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
>         for (i = 0; i < num_compiler_threads_lowprio; i++)
>                 sscreen->tm_low_priority[i] = 
> si_create_llvm_target_machine(sscreen);
>
>         /* Create the auxiliary context. This must be done last. */
>         sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
>
> +       if (sscreen->b.debug_flags & DBG(INFO)) {
> +               printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
> +                      sscreen->b.info.pci_domain, sscreen->b.info.pci_bus,
> +                      sscreen->b.info.pci_dev, sscreen->b.info.pci_func);
> +               printf("pci_id = 0x%x\n", sscreen->b.info.pci_id);
> +               printf("family = %i (%s)\n", sscreen->b.info.family,
> +                      si_get_family_name(sscreen));
> +               printf("chip_class = %i\n", sscreen->b.info.chip_class);
> +               printf("pte_fragment_size = %u\n", 
> sscreen->b.info.pte_fragment_size);
> +               printf("gart_page_size = %u\n", 
> sscreen->b.info.gart_page_size);
> +               printf("gart_size = %i MB\n", 
> (int)DIV_ROUND_UP(sscreen->b.info.gart_size, 1024*1024));
> +               printf("vram_size = %i MB\n", 
> (int)DIV_ROUND_UP(sscreen->b.info.vram_size, 1024*1024));
> +               printf("vram_vis_size = %i MB\n", 
> (int)DIV_ROUND_UP(sscreen->b.info.vram_vis_size, 1024*1024));
> +               printf("max_alloc_size = %i MB\n",
> +                      (int)DIV_ROUND_UP(sscreen->b.info.max_alloc_size, 
> 1024*1024));
> +               printf("min_alloc_size = %u\n", 
> sscreen->b.info.min_alloc_size);
> +               printf("has_dedicated_vram = %u\n", 
> sscreen->b.info.has_dedicated_vram);
> +               printf("has_virtual_memory = %i\n", 
> sscreen->b.info.has_virtual_memory);
> +               printf("gfx_ib_pad_with_type2 = %i\n", 
> sscreen->b.info.gfx_ib_pad_with_type2);
> +               printf("has_hw_decode = %u\n", sscreen->b.info.has_hw_decode);
> +               printf("num_sdma_rings = %i\n", 
> sscreen->b.info.num_sdma_rings);
> +               printf("num_compute_rings = %u\n", 
> sscreen->b.info.num_compute_rings);
> +               printf("uvd_fw_version = %u\n", 
> sscreen->b.info.uvd_fw_version);
> +               printf("vce_fw_version = %u\n", 
> sscreen->b.info.vce_fw_version);
> +               printf("me_fw_version = %i\n", sscreen->b.info.me_fw_version);
> +               printf("me_fw_feature = %i\n", sscreen->b.info.me_fw_feature);
> +               printf("pfp_fw_version = %i\n", 
> sscreen->b.info.pfp_fw_version);
> +               printf("pfp_fw_feature = %i\n", 
> sscreen->b.info.pfp_fw_feature);
> +               printf("ce_fw_version = %i\n", sscreen->b.info.ce_fw_version);
> +               printf("ce_fw_feature = %i\n", sscreen->b.info.ce_fw_feature);
> +               printf("vce_harvest_config = %i\n", 
> sscreen->b.info.vce_harvest_config);
> +               printf("clock_crystal_freq = %i\n", 
> sscreen->b.info.clock_crystal_freq);
> +               printf("tcc_cache_line_size = %u\n", 
> sscreen->b.info.tcc_cache_line_size);
> +               printf("drm = %i.%i.%i\n", sscreen->b.info.drm_major,
> +                      sscreen->b.info.drm_minor, 
> sscreen->b.info.drm_patchlevel);
> +               printf("has_userptr = %i\n", sscreen->b.info.has_userptr);
> +               printf("has_syncobj = %u\n", sscreen->b.info.has_syncobj);
> +               printf("has_sync_file = %u\n", sscreen->b.info.has_sync_file);
> +
> +               printf("r600_max_quad_pipes = %i\n", 
> sscreen->b.info.r600_max_quad_pipes);
> +               printf("max_shader_clock = %i\n", 
> sscreen->b.info.max_shader_clock);
> +               printf("num_good_compute_units = %i\n", 
> sscreen->b.info.num_good_compute_units);
> +               printf("max_se = %i\n", sscreen->b.info.max_se);
> +               printf("max_sh_per_se = %i\n", sscreen->b.info.max_sh_per_se);
> +
> +               printf("r600_gb_backend_map = %i\n", 
> sscreen->b.info.r600_gb_backend_map);
> +               printf("r600_gb_backend_map_valid = %i\n", 
> sscreen->b.info.r600_gb_backend_map_valid);
> +               printf("r600_num_banks = %i\n", 
> sscreen->b.info.r600_num_banks);
> +               printf("num_render_backends = %i\n", 
> sscreen->b.info.num_render_backends);
> +               printf("num_tile_pipes = %i\n", 
> sscreen->b.info.num_tile_pipes);
> +               printf("pipe_interleave_bytes = %i\n", 
> sscreen->b.info.pipe_interleave_bytes);
> +               printf("enabled_rb_mask = 0x%x\n", 
> sscreen->b.info.enabled_rb_mask);
> +               printf("max_alignment = %u\n", 
> (unsigned)sscreen->b.info.max_alignment);
> +       }
> +
>         if (sscreen->b.debug_flags & DBG(TEST_DMA))
>                 si_test_dma(&sscreen->b);
>
>         if (sscreen->b.debug_flags & (DBG(TEST_VMFAULT_CP) |
>                                       DBG(TEST_VMFAULT_SDMA) |
>                                       DBG(TEST_VMFAULT_SHADER)))
>                 si_test_vmfault(sscreen);
>
>         return &sscreen->b.b;
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index 751441df1bc..a66f9da8658 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -99,20 +99,22 @@ struct si_screen {
>         bool                            has_msaa_sample_loc_bug;
>         bool                            has_ls_vgpr_init_bug;
>         bool                            dpbb_allowed;
>         bool                            dfsm_allowed;
>         bool                            llvm_has_working_vgpr_indexing;
>
>         /* Whether shaders are monolithic (1-part) or separate (3-part). */
>         bool                            use_monolithic_shaders;
>         bool                            record_llvm_ir;
>
> +       struct disk_cache               *disk_shader_cache;
> +
>         mtx_t                   shader_parts_mutex;
>         struct si_shader_part           *vs_prologs;
>         struct si_shader_part           *tcs_epilogs;
>         struct si_shader_part           *gs_prologs;
>         struct si_shader_part           *ps_prologs;
>         struct si_shader_part           *ps_epilogs;
>
>         /* Shader cache in memory.
>          *
>          * Design & limitations:
> @@ -132,20 +134,22 @@ struct si_screen {
>         struct util_queue               shader_compiler_queue;
>         /* Use at most 3 normal compiler threads on quadcore and better.
>          * Hyperthreaded CPUs report the number of threads, but we want
>          * the number of cores. */
>         LLVMTargetMachineRef            tm[3]; /* used by the queue only */
>
>         struct util_queue               shader_compiler_queue_low_priority;
>         /* Use at most 2 low priority threads on quadcore and better.
>          * We want to minimize the impact on multithreaded Mesa. */
>         LLVMTargetMachineRef            tm_low_priority[2]; /* at most 2 
> threads */
> +
> +       char                            renderer_string[100];
>  };
>
>  struct si_blend_color {
>         struct r600_atom                atom;
>         struct pipe_blend_color         state;
>         bool                            any_nonzeros;
>  };
>
>  struct si_sampler_view {
>         struct pipe_sampler_view        base;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 3edc340f01f..e1c70aaea26 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -199,61 +199,61 @@ static bool si_shader_cache_insert_shader(struct 
> si_screen *sscreen,
>         hw_binary = si_get_shader_binary(shader);
>         if (!hw_binary)
>                 return false;
>
>         if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary,
>                                     hw_binary) == NULL) {
>                 FREE(hw_binary);
>                 return false;
>         }
>
> -       if (sscreen->b.disk_shader_cache && insert_into_disk_cache) {
> -               disk_cache_compute_key(sscreen->b.disk_shader_cache, 
> tgsi_binary,
> +       if (sscreen->disk_shader_cache && insert_into_disk_cache) {
> +               disk_cache_compute_key(sscreen->disk_shader_cache, 
> tgsi_binary,
>                                        *((uint32_t *)tgsi_binary), key);
> -               disk_cache_put(sscreen->b.disk_shader_cache, key, hw_binary,
> +               disk_cache_put(sscreen->disk_shader_cache, key, hw_binary,
>                                *((uint32_t *) hw_binary), NULL);
>         }
>
>         return true;
>  }
>
>  static bool si_shader_cache_load_shader(struct si_screen *sscreen,
>                                         void *tgsi_binary,
>                                         struct si_shader *shader)
>  {
>         struct hash_entry *entry =
>                 _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary);
>         if (!entry) {
> -               if (sscreen->b.disk_shader_cache) {
> +               if (sscreen->disk_shader_cache) {
>                         unsigned char sha1[CACHE_KEY_SIZE];
>                         size_t tg_size = *((uint32_t *) tgsi_binary);
>
> -                       disk_cache_compute_key(sscreen->b.disk_shader_cache,
> +                       disk_cache_compute_key(sscreen->disk_shader_cache,
>                                                tgsi_binary, tg_size, sha1);
>
>                         size_t binary_size;
>                         uint8_t *buffer =
> -                               disk_cache_get(sscreen->b.disk_shader_cache,
> +                               disk_cache_get(sscreen->disk_shader_cache,
>                                                sha1, &binary_size);
>                         if (!buffer)
>                                 return false;
>
>                         if (binary_size < sizeof(uint32_t) ||
>                             *((uint32_t*)buffer) != binary_size) {
>                                  /* Something has gone wrong discard the item
>                                   * from the cache and rebuild/link from
>                                   * source.
>                                   */
>                                 assert(!"Invalid radeonsi shader disk cache "
>                                        "item!");
>
> -                               
> disk_cache_remove(sscreen->b.disk_shader_cache,
> +                               disk_cache_remove(sscreen->disk_shader_cache,
>                                                   sha1);
>                                 free(buffer);
>
>                                 return false;
>                         }
>
>                         if (!si_load_shader_binary(shader, buffer)) {
>                                 free(buffer);
>                                 return false;
>                         }
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: move caps, vendor/device name, and disk shader cache to radeonsi folder

Reply via email to