On 08.01.2015 06:03, Tom Stellard wrote:
> Instead of passing a pointer to the scratch buffer via user sgprs, we
> now patch the shader with the buffer address using reloc information
> from the LLVM generated ELF.

[...]

> @@ -174,6 +183,35 @@ static unsigned compute_num_waves_for_scratch(
>       return scratch_waves;
>  }
>  
> +static void apply_scratch_relocs(const struct si_screen *sscreen,
> +                     const struct radeon_shader_binary *binary,
> +                     struct si_shader *shader, uint64_t scratch_va) {
> +     unsigned i;
> +     char *ptr;
> +     uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
> +     uint32_t scratch_rsrc_dword1 =
> +             S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
> +             |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
> +
> +     if (!binary->reloc_count) {
> +             return;
> +     }
> +
> +     ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
> +                                     PIPE_TRANSFER_READ_WRITE);
> +     for (i = 0 ; i < binary->reloc_count; i++) {
> +             const struct radeon_shader_reloc *reloc = &binary->relocs[i];
> +             if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
> +                     util_memcpy_cpu_to_le32(ptr + reloc->offset,
> +                             &scratch_rsrc_dword0, 4);
> +             } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
> +                     util_memcpy_cpu_to_le32(ptr + reloc->offset,
> +                             &scratch_rsrc_dword1, 4);
> +             }
> +     }
> +     sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
> +}

[...]

> @@ -273,10 +315,6 @@ static void si_launch_grid(
>  
>       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
>       si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, 
> S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
> -     si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, 
> scratch_buffer_va);
> -     si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 12,
> -             S_008F04_BASE_ADDRESS_HI(scratch_buffer_va >> 32)
> -             |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64));
>  
>       si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
>       si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
> 

Looks like this will break with older LLVM.


Patch 3 looks good to me. Haven't had time to look at patch 4 in detail.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to