Re: [Mesa-dev] [PATCH] nir/lower_tex: Add an assert() in nir_lower_txs_lod()

2019-06-19 Thread Jason Ekstrand
Thanks!

Reviewed-by: Jason Ekstrand 

On Wed, Jun 19, 2019 at 8:09 AM Boris Brezillon <
boris.brezil...@collabora.com> wrote:

> We don't expect the output of a TXS instruction to be wider than a
> vec3. Add an assert() to make sure this never happens.
>
> Suggested-by: Jason Ekstrand 
> Signed-off-by: Boris Brezillon 
> ---
>  src/compiler/nir/nir_lower_tex.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/compiler/nir/nir_lower_tex.c
> b/src/compiler/nir/nir_lower_tex.c
> index 8a36edfbc5e4..05b46b902521 100644
> --- a/src/compiler/nir/nir_lower_tex.c
> +++ b/src/compiler/nir/nir_lower_tex.c
> @@ -1011,6 +1011,7 @@ nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
> if (tex->is_array) {
>nir_ssa_def *comp[3];
>
> +  assert(dest_size <= ARRAY_SIZE(comp));
>for (unsigned i = 0; i < dest_size - 1; i++)
>   comp[i] = nir_channel(b, minified, i);
>
> --
> 2.20.1
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/8] radeonsi: don't set spi_ps_input_* for monolithic shaders

2019-06-19 Thread Marek Olšák
From: Marek Olšák 

The driver doesn't use these values and ac_rtld has assertions
expecting the value of 0.
---
 src/gallium/drivers/radeonsi/si_shader.c | 39 
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 54b29d0ae01..0489399b827 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6128,21 +6128,22 @@ static void si_get_ps_prolog_key(struct si_shader 
*shader,
 key->ps_prolog.states.bc_optimize_for_linear);
key->ps_prolog.ancillary_vgpr_index = shader->info.ancillary_vgpr_index;
 
if (info->colors_read) {
unsigned *color = shader->selector->color_attr_index;
 
if (shader->key.part.ps.prolog.color_two_side) {
/* BCOLORs are stored after the last input. */
key->ps_prolog.num_interp_inputs = info->num_inputs;
key->ps_prolog.face_vgpr_index = 
shader->info.face_vgpr_index;
-   shader->config.spi_ps_input_ena |= 
S_0286CC_FRONT_FACE_ENA(1);
+   if (separate_prolog)
+   shader->config.spi_ps_input_ena |= 
S_0286CC_FRONT_FACE_ENA(1);
}
 
for (unsigned i = 0; i < 2; i++) {
unsigned interp = info->input_interpolate[color[i]];
unsigned location = 
info->input_interpolate_loc[color[i]];
 
if (!(info->colors_read & (0xf << i*4)))
continue;
 
key->ps_prolog.color_attr_index[i] = color[i];
@@ -6159,66 +6160,78 @@ static void si_get_ps_prolog_key(struct si_shader 
*shader,
case TGSI_INTERPOLATE_COLOR:
/* Force the interpolation location for colors 
here. */
if 
(shader->key.part.ps.prolog.force_persp_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
if 
(shader->key.part.ps.prolog.force_persp_center_interp)
location = TGSI_INTERPOLATE_LOC_CENTER;
 
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:

key->ps_prolog.color_interp_vgpr_index[i] = 0;
-   shader->config.spi_ps_input_ena |=
-   S_0286CC_PERSP_SAMPLE_ENA(1);
+   if (separate_prolog) {
+   shader->config.spi_ps_input_ena 
|=
+   
S_0286CC_PERSP_SAMPLE_ENA(1);
+   }
break;
case TGSI_INTERPOLATE_LOC_CENTER:

key->ps_prolog.color_interp_vgpr_index[i] = 2;
-   shader->config.spi_ps_input_ena |=
-   S_0286CC_PERSP_CENTER_ENA(1);
+   if (separate_prolog) {
+   shader->config.spi_ps_input_ena 
|=
+   
S_0286CC_PERSP_CENTER_ENA(1);
+   }
break;
case TGSI_INTERPOLATE_LOC_CENTROID:

key->ps_prolog.color_interp_vgpr_index[i] = 4;
-   shader->config.spi_ps_input_ena |=
-   S_0286CC_PERSP_CENTROID_ENA(1);
+   if (separate_prolog) {
+   shader->config.spi_ps_input_ena 
|=
+   
S_0286CC_PERSP_CENTROID_ENA(1);
+   }
break;
default:
assert(0);
}
break;
case TGSI_INTERPOLATE_LINEAR:
/* Force the interpolation location for colors 
here. */
if 
(shader->key.part.ps.prolog.force_linear_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
if 
(shader->key.part.ps.prolog.force_linear_center_interp)
location = TGSI_INTERPOLATE_LOC_CENTER;
 
/* The VGPR assignment for non-monolithic 

[Mesa-dev] [PATCH 2/8] amd/rtld: update the ELF representation of LDS symbols

2019-06-19 Thread Marek Olšák
From: Nicolai Hähnle 

The initial prototype used a processor-specific symbol type, but
feedback suggests that an approach using processor-specific section
name that encodes the alignment analogous to SHN_COMMON symbols is
preferred.

This patch keeps both variants around for now to reduce problems
with LLVM compatibility as we switch branches around.

This also cleans up the error reporting in this function.
---
 src/amd/common/ac_rtld.c | 34 +++---
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c
index 57d6b0151b4..ebf64d91658 100644
--- a/src/amd/common/ac_rtld.c
+++ b/src/amd/common/ac_rtld.c
@@ -32,21 +32,25 @@
 
 #include "ac_binary.h"
 #include "ac_gpu_info.h"
 #include "util/u_dynarray.h"
 #include "util/u_math.h"
 
 // Old distributions may not have this enum constant
 #define MY_EM_AMDGPU 224
 
 #ifndef STT_AMDGPU_LDS
-#define STT_AMDGPU_LDS 13
+#define STT_AMDGPU_LDS 13 // this is deprecated -- remove
+#endif
+
+#ifndef SHN_AMDGPU_LDS
+#define SHN_AMDGPU_LDS 0xff00
 #endif
 
 #ifndef R_AMDGPU_NONE
 #define R_AMDGPU_NONE 0
 #define R_AMDGPU_ABS32_LO 1
 #define R_AMDGPU_ABS32_HI 2
 #define R_AMDGPU_ABS64 3
 #define R_AMDGPU_REL32 4
 #define R_AMDGPU_REL64 5
 #define R_AMDGPU_ABS32 6
@@ -169,47 +173,60 @@ static bool layout_symbols(struct ac_rtld_symbol 
*symbols, unsigned num_symbols,
  * Read LDS symbols from the given \p section of the ELF of \p part and append
  * them to the LDS symbols list.
  *
  * Shared LDS symbols are filtered out.
  */
 static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
 unsigned part_idx,
 Elf_Scn *section,
 uint32_t *lds_end_align)
 {
-#define report_elf_if(cond) \
+#define report_if(cond) \
do { \
if ((cond)) { \
report_errorf(#cond); \
return false; \
} \
} while (false)
+#define report_elf_if(cond) \
+   do { \
+   if ((cond)) { \
+   report_elf_errorf(#cond); \
+   return false; \
+   } \
+   } while (false)
 
struct ac_rtld_part *part = >parts[part_idx];
Elf64_Shdr *shdr = elf64_getshdr(section);
uint32_t strtabidx = shdr->sh_link;
Elf_Data *symbols_data = elf_getdata(section, NULL);
report_elf_if(!symbols_data);
 
const Elf64_Sym *symbol = symbols_data->d_buf;
size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
 
for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
-   if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS)
+   struct ac_rtld_symbol s = {};
+
+   if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
+   /* old-style LDS symbols from initial prototype -- 
remove eventually */
+   s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
+   } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
+   s.align = MIN2(symbol->st_value, 1u << 16);
+   report_if(!util_is_power_of_two_nonzero(s.align));
+   } else
continue;
 
-   report_elf_if(symbol->st_size > 1u << 29);
+   report_if(symbol->st_size > 1u << 29);
 
-   struct ac_rtld_symbol s = {};
s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
s.size = symbol->st_size;
-   s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
s.part_idx = part_idx;
 
if (!strcmp(s.name, "__lds_end")) {
report_elf_if(s.size != 0);
*lds_end_align = MAX2(*lds_end_align, s.align);
continue;
}
 
const struct ac_rtld_symbol *shared =
find_symbol(>lds_symbols, s.name, part_idx);
@@ -217,20 +234,21 @@ static bool read_private_lds_symbols(struct 
ac_rtld_binary *binary,
report_elf_if(s.align > shared->align);
report_elf_if(s.size > shared->size);
continue;
}
 
util_dynarray_append(>lds_symbols, struct 
ac_rtld_symbol, s);
}
 
return true;
 
+#undef report_if
 #undef report_elf_if
 }
 
 /**
  * Open a binary consisting of one or more shader parts.
  *
  * \param binary the uninitialized struct
  * \param i binary opening parameters
  */
 bool ac_rtld_open(struct ac_rtld_binary *binary,
@@ -515,21 +533,23 @@ bool ac_rtld_read_config(struct ac_rtld_binary *binary,
config->rsrc2 = c.rsrc2;
}
 
return true;
 }
 
 static bool resolve_symbol(const struct ac_rtld_upload_info *u,
   unsigned part_idx, 

[Mesa-dev] [PATCH 8/8] radeonsi: rename and re-document cache flush flags

2019-06-19 Thread Marek Olšák
From: Marek Olšák 

SMEM and VMEM caches are L0 on gfx10.
---
 src/gallium/drivers/radeonsi/si_compute.c |  2 +-
 .../drivers/radeonsi/si_compute_blit.c| 12 +++---
 src/gallium/drivers/radeonsi/si_descriptors.c |  2 +-
 src/gallium/drivers/radeonsi/si_gfx_cs.c  |  8 ++--
 src/gallium/drivers/radeonsi/si_pipe.c|  8 ++--
 src/gallium/drivers/radeonsi/si_pipe.h| 34 +
 src/gallium/drivers/radeonsi/si_state.c   | 14 +++
 src/gallium/drivers/radeonsi/si_state_draw.c  | 38 +--
 .../drivers/radeonsi/si_state_streamout.c |  6 +--
 .../drivers/radeonsi/si_test_dma_perf.c   |  6 +--
 10 files changed, 66 insertions(+), 64 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 7e5259b70a0..63c95ed2604 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -910,21 +910,21 @@ static void si_launch_grid(
/* Add buffer sizes for memory checking in need_cs_space. */
si_context_add_resource_size(sctx, >shader.bo->b.b);
/* TODO: add the scratch buffer */
 
if (info->indirect) {
si_context_add_resource_size(sctx, info->indirect);
 
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (sctx->chip_class <= GFX8 &&
si_resource(info->indirect)->TC_L2_dirty) {
-   sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+   sctx->flags |= SI_CONTEXT_WB_L2;
si_resource(info->indirect)->TC_L2_dirty = false;
}
}
 
si_need_gfx_cs_space(sctx);
 
if (sctx->bo_list_add_all_compute_resources)
si_compute_resources_add_all_to_bo_list(sctx);
 
if (!sctx->cs_shader_state.initialized) {
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c 
b/src/gallium/drivers/radeonsi/si_compute_blit.c
index 1cfdc9b62c6..4c5464ac118 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -44,23 +44,23 @@ static enum si_cache_policy get_cache_policy(struct 
si_context *sctx,
 
 unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
enum si_cache_policy cache_policy)
 {
switch (coher) {
default:
case SI_COHERENCY_NONE:
case SI_COHERENCY_CP:
return 0;
case SI_COHERENCY_SHADER:
-   return SI_CONTEXT_INV_SMEM_L1 |
-  SI_CONTEXT_INV_VMEM_L1 |
-  (cache_policy == L2_BYPASS ? SI_CONTEXT_INV_GLOBAL_L2 : 
0);
+   return SI_CONTEXT_INV_SCACHE |
+  SI_CONTEXT_INV_VCACHE |
+  (cache_policy == L2_BYPASS ? SI_CONTEXT_INV_L2 : 0);
case SI_COHERENCY_CB_META:
return SI_CONTEXT_FLUSH_AND_INV_CB;
}
 }
 
 static void si_compute_internal_begin(struct si_context *sctx)
 {
sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->render_cond_force_off = true;
@@ -165,21 +165,21 @@ static void si_compute_do_clear_or_copy(struct si_context 
*sctx,
 
SI_COMPUTE_CLEAR_DW_PER_THREAD,
 
shader_dst_stream_policy, false);
}
ctx->bind_compute_state(ctx, sctx->cs_clear_buffer);
}
 
ctx->launch_grid(ctx, );
 
enum si_cache_policy cache_policy = get_cache_policy(sctx, coher, size);
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
-  (cache_policy == L2_BYPASS ? 
SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0);
+  (cache_policy == L2_BYPASS ? SI_CONTEXT_WB_L2 : 0);
 
if (cache_policy != L2_BYPASS)
si_resource(dst)->TC_L2_dirty = true;
 
/* Restore states. */
ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, 
saved_sb,
saved_writable_mask);
si_compute_internal_end(sctx);
 }
@@ -411,21 +411,21 @@ void si_compute_copy_image(struct si_context *sctx,
info.last_block[1] = height % 8;
info.block[2] = 1;
info.grid[0] = DIV_ROUND_UP(width, 8);
info.grid[1] = DIV_ROUND_UP(height, 8);
info.grid[2] = depth;
}
 
ctx->launch_grid(ctx, );
 
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
-  (sctx->chip_class <= GFX8 ? 
SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
+  (sctx->chip_class <= GFX8 ? SI_CONTEXT_WB_L2 : 0) |
   si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
ctx->bind_compute_state(ctx, saved_cs);
  

[Mesa-dev] [PATCH 6/8] radeonsi: flatten the switch for DPBB tunables

2019-06-19 Thread Marek Olšák
From: Marek Olšák 

---
 .../drivers/radeonsi/si_state_binning.c| 18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c 
b/src/gallium/drivers/radeonsi/si_state_binning.c
index 6285ccc28c2..a6b1830b661 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -395,34 +395,24 @@ void si_emit_dpbb_state(struct si_context *sctx)
punchout_mode = V_028060_AUTO;
disable_start_of_prim = (cb_target_enabled_4bit &
 blend->blend_enable_4bit) != 0;
}
 
/* Tunable parameters. Also test with DFSM enabled/disabled. */
unsigned context_states_per_bin; /* allowed range: [0, 5] */
unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
 
-   switch (sctx->family) {
-   case CHIP_VEGA10:
-   case CHIP_VEGA12:
-   case CHIP_VEGA20:
-   case CHIP_RAVEN:
-   case CHIP_RAVEN2:
-   /* Tuned for Raven. Vega might need different values. */
-   context_states_per_bin = 5;
-   persistent_states_per_bin = 31;
-   fpovs_per_batch = 63;
-   break;
-   default:
-   assert(0);
-   }
+   /* Tuned for Raven. Vega might need different values. */
+   context_states_per_bin = 5;
+   persistent_states_per_bin = 31;
+   fpovs_per_batch = 63;
 
/* Emit registers. */
struct uvec2 bin_size_extend = {};
if (bin_size.x >= 32)
bin_size_extend.x = util_logbase2(bin_size.x) - 5;
if (bin_size.y >= 32)
bin_size_extend.y = util_logbase2(bin_size.y) - 5;
 
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
radeon_opt_set_context_reg(
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/8] radeonsi: set the calling convention for inlined function calls

2019-06-19 Thread Marek Olšák
From: Marek Olšák 

otherwise the behavior is undefined
---
 src/amd/common/ac_llvm_build.c | 8 
 src/amd/common/ac_llvm_build.h | 3 +++
 src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 2 +-
 src/gallium/drivers/radeonsi/si_shader.c   | 2 +-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1e6247ad72e..cdd4c36f5da 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -4427,10 +4427,18 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, 
LLVMValueRef interp_ij)
 
 LLVMValueRef
 ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
 {
LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
 ctx->i1, NULL, 0,
 AC_FUNC_ATTR_READNONE);
result = LLVMBuildNot(ctx->builder, result, "");
return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
 }
+
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+  LLVMValueRef *args, unsigned num_args)
+{
+   LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, 
"");
+   LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
+   return ret;
+}
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index a1654d2b2c4..1928843c78c 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -713,20 +713,23 @@ ac_build_frexp_exp(struct ac_llvm_context *ctx, 
LLVMValueRef src0,
 LLVMValueRef
 ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
 
 LLVMValueRef
 ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
 
 LLVMValueRef
 ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
 
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+  LLVMValueRef *args, unsigned num_args);
+
 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, 
LLVMAtomicRMWBinOp op,
 LLVMValueRef ptr, LLVMValueRef val,
 const char *sync_scope);
 
 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, 
LLVMValueRef ptr,
  LLVMValueRef cmp, LLVMValueRef val,
  const char *sync_scope);
 
 #ifdef __cplusplus
 }
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c 
b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index 0f2934243a1..28da7b92250 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -660,21 +660,21 @@ void si_build_prim_discard_compute_shader(struct 
si_shader_context *ctx)
index[1] = LLVMBuildSelect(builder, prim_is_odd, 
index0, index1, "");
}
}
 
/* Execute the vertex shader for each vertex to get vertex positions. */
LLVMValueRef pos[3][4];
for (unsigned i = 0; i < vertices_per_prim; i++) {
vs_params[param_vertex_id] = index[i];
vs_params[param_instance_id] = instance_id;
 
-   LLVMValueRef ret = LLVMBuildCall(builder, vs, vs_params, 
num_vs_params, "");
+   LLVMValueRef ret = ac_build_call(>ac, vs, vs_params, 
num_vs_params);
for (unsigned chan = 0; chan < 4; chan++)
pos[i][chan] = LLVMBuildExtractValue(builder, ret, 
chan, "");
}
 
/* Divide XYZ by W. */
for (unsigned i = 0; i < vertices_per_prim; i++) {
for (unsigned chan = 0; chan < 3; chan++)
pos[i][chan] = ac_build_fdiv(>ac, pos[i][chan], 
pos[i][3]);
}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index eb75e2a77a4..54b29d0ae01 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6584,21 +6584,21 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
}
} else {
arg = LLVMBuildBitCast(builder, arg, 
param_type, "");
}
}
 
in[param_idx] = arg;
out_idx += param_size;
}
 
-   ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
+   ret = ac_build_call(>ac, parts[part], in, num_params);
 
if (is_merged_shader(ctx) &&
part + 1 == next_shader_first_part) {
lp_build_endif(_state);
 
/* The second half of 

[Mesa-dev] [PATCH 1/8] ac/surface: remove addrlib_family_rev_id

2019-06-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_gpu_info.c |   2 +
 src/amd/common/ac_gpu_info.h |   2 +
 src/amd/common/ac_surface.c  | 111 +--
 3 files changed, 7 insertions(+), 108 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 4de6882f15e..cd81c5757f3 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -328,20 +328,22 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
info->chip_class = GFX8;
else if (info->family >= CHIP_BONAIRE)
info->chip_class = GFX7;
else if (info->family >= CHIP_TAHITI)
info->chip_class = GFX6;
else {
fprintf(stderr, "amdgpu: Unknown family.\n");
return false;
}
 
+   info->family_id = amdinfo->family_id;
+   info->chip_external_rev = amdinfo->chip_external_rev;
info->marketing_name = amdgpu_get_marketing_name(dev);
info->is_pro_graphics = info->marketing_name &&
(!strcmp(info->marketing_name, "Pro") ||
 !strcmp(info->marketing_name, "PRO") ||
 !strcmp(info->marketing_name, "Frontier"));
 
/* Set which chips have dedicated VRAM. */
info->has_dedicated_vram =
!(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
 
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 2c67cec3ed5..d296c7eb89f 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -46,20 +46,22 @@ struct radeon_info {
uint32_tpci_dev;
uint32_tpci_func;
 
/* Device info. */
const char  *name;
const char  *marketing_name;
boolis_pro_graphics;
uint32_tpci_id;
enum radeon_family  family;
enum chip_class chip_class;
+   uint32_tfamily_id;
+   uint32_tchip_external_rev;
uint32_tnum_compute_rings;
uint32_tnum_sdma_rings;
uint32_tclock_crystal_freq;
uint32_ttcc_cache_line_size;
 
/* There are 2 display DCC codepaths, because display expects unaligned 
DCC. */
/* Disable RB and pipe alignment to skip the retile blit. (1 RB chips 
only) */
booluse_display_dcc_unaligned;
/* Allocate both aligned and unaligned DCC and use the retile blit. */
booluse_display_dcc_with_retile_blit;
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 7b86cb1e1f2..b336655a913 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -42,127 +42,20 @@
 #include "addrlib/inc/addrinterface.h"
 
 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x000A
 #endif
 
 #ifndef CIASICIDGFXENGINE_ARCTICISLAND
 #define CIASICIDGFXENGINE_ARCTICISLAND 0x000D
 #endif
 
-static unsigned get_first(unsigned x, unsigned y)
-{
-   return x;
-}
-
-static void addrlib_family_rev_id(enum radeon_family family,
- unsigned *addrlib_family,
- unsigned *addrlib_revid)
-{
-   switch (family) {
-   case CHIP_TAHITI:
-   *addrlib_family = FAMILY_SI;
-   *addrlib_revid = get_first(AMDGPU_TAHITI_RANGE);
-   break;
-   case CHIP_PITCAIRN:
-   *addrlib_family = FAMILY_SI;
-   *addrlib_revid = get_first(AMDGPU_PITCAIRN_RANGE);
-   break;
-   case CHIP_VERDE:
-   *addrlib_family = FAMILY_SI;
-   *addrlib_revid =  get_first(AMDGPU_CAPEVERDE_RANGE);
-   break;
-   case CHIP_OLAND:
-   *addrlib_family = FAMILY_SI;
-   *addrlib_revid = get_first(AMDGPU_OLAND_RANGE);
-   break;
-   case CHIP_HAINAN:
-   *addrlib_family = FAMILY_SI;
-   *addrlib_revid = get_first(AMDGPU_HAINAN_RANGE);
-   break;
-   case CHIP_BONAIRE:
-   *addrlib_family = FAMILY_CI;
-   *addrlib_revid = get_first(AMDGPU_BONAIRE_RANGE);
-   break;
-   case CHIP_KAVERI:
-   *addrlib_family = FAMILY_KV;
-   *addrlib_revid = get_first(AMDGPU_SPECTRE_RANGE);
-   break;
-   case CHIP_KABINI:
-   *addrlib_family = FAMILY_KV;
-   *addrlib_revid = get_first(AMDGPU_KALINDI_RANGE);
-   break;
-   case CHIP_HAWAII:
-   *addrlib_family = FAMILY_CI;
-   *addrlib_revid = get_first(AMDGPU_HAWAII_RANGE);
-   break;
-   case CHIP_TONGA:
-   *addrlib_family = 

[Mesa-dev] [PATCH 3/8] radeonsi: refactor si_update_vgt_shader_config

2019-06-19 Thread Marek Olšák
From: Nicolai Hähnle 

We'll have to extend this at some point, and using a bitfield union in
this way makes it easier to get the right index without excessive
branching.
---
 src/gallium/drivers/radeonsi/si_pipe.h| 23 ++-
 .../drivers/radeonsi/si_state_shaders.c   | 65 +++
 2 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index d32feab52c2..368cb4e473d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -760,20 +760,41 @@ union si_vgt_param_key {
unsigned count_from_stream_output:1;
unsigned primitive_restart:1;
unsigned multi_instances_smaller_than_primgroup:1;
unsigned uses_instancing:1;
unsigned prim:4;
 #endif
} u;
uint32_t index;
 };
 
+#define SI_NUM_VGT_STAGES_KEY_BITS 2
+#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
+
+/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
+ * Some fields are set by state-change calls, most are set by draw_vbo.
+ */
+union si_vgt_stages_key {
+   struct {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+   unsigned tess:1;
+   unsigned gs:1;
+   unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
+#else /* PIPE_ARCH_BIG_ENDIAN */
+   unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
+   unsigned gs:1;
+   unsigned tess:1;
+#endif
+   } u;
+   uint32_t index;
+};
+
 struct si_texture_handle
 {
unsigneddesc_slot;
booldesc_dirty;
struct pipe_sampler_view*view;
struct si_sampler_state sstate;
 };
 
 struct si_image_handle
 {
@@ -914,21 +935,21 @@ struct si_context {
struct si_streamout streamout;
struct si_viewports viewports;
unsignednum_window_rectangles;
boolwindow_rectangles_include;
struct pipe_scissor_state   window_rectangles[4];
 
/* Precomputed states. */
struct si_pm4_state *init_config;
struct si_pm4_state *init_config_gs_rings;
boolinit_config_has_vgt_flush;
-   struct si_pm4_state *vgt_shader_config[4];
+   struct si_pm4_state 
*vgt_shader_config[SI_NUM_VGT_STAGES_STATES];
 
/* shaders */
struct si_shader_ctx_state  ps_shader;
struct si_shader_ctx_state  gs_shader;
struct si_shader_ctx_state  vs_shader;
struct si_shader_ctx_state  tcs_shader;
struct si_shader_ctx_state  tes_shader;
struct si_shader_ctx_state  cs_prim_discard_state;
struct si_cs_shader_state   cs_shader_state;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index fab2e255742..0e3a1c3a776 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -3311,74 +3311,83 @@ static void si_init_tess_factor_ring(struct si_context 
*sctx)
}
 
/* Flush the context to re-emit the init_config state.
 * This is done only once in a lifetime of a context.
 */
si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
sctx->initial_gfx_cs_size = 0; /* force flush */
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
 }
 
-static void si_update_vgt_shader_config(struct si_context *sctx)
+static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen 
*screen,
+  union si_vgt_stages_key 
key)
 {
-   /* Calculate the index of the config.
-* 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
-   unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso;
-   struct si_pm4_state **pm4 = >vgt_shader_config[index];
+   struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+   uint32_t stages = 0;
 
-   if (!*pm4) {
-   uint32_t stages = 0;
+   if (key.u.tess) {
+   stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
+ S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
 
-   *pm4 = CALLOC_STRUCT(si_pm4_state);
+   if (key.u.gs)
+   stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
+ S_028B54_GS_EN(1);
+   else
+   stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+   } else if (key.u.gs) {
+   stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+ S_028B54_GS_EN(1);
+   }
 
-   if (sctx->tes_shader.cso) {
-   stages 

[Mesa-dev] [Bug 110673] amdgpu hevc encoding problems: segment fault and contents of garbage

2019-06-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110673

zhoulei  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #8 from zhoulei  ---
Issues have been fixed with these two set of patches:

https://lists.freedesktop.org/archives/mesa-dev/2019-May/219673.html

https://lists.freedesktop.org/archives/mesa-dev/2019-June/220450.html

Mark as RESOLVED.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] nir/loop_analyze: used nir_alu_src to track loop limit

2019-06-19 Thread Timothy Arceri



On 20/6/19 5:57 am, Jason Ekstrand wrote:
On Wed, Jun 19, 2019 at 3:09 AM Timothy Arceri > wrote:


This helps reduce the amount of abstraction in this pass and allows
us to retain more information about the src such as any swizzles.
Retaining the swizzle information is required for a bugfix in the
following patch.

Fixes: 6772a17acc8e ("nir: Add a loop analysis pass")
---
  src/compiler/nir/nir_loop_analyze.c | 37 +++--
  1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index e85a404da1b..57d2d94cad2 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -543,25 +543,26 @@ guess_loop_limit(loop_info_state *state,
nir_const_value *limit_val,
  }

  static bool
-try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value
*limit_val,
-                      nir_loop_terminator *terminator,
loop_info_state *state)
+try_find_limit_of_alu(nir_alu_src *limit, nir_const_value *limit_val,
+                      nir_loop_terminator *terminator)
  {
-   if(!is_var_alu(limit))
+   if(limit->src.ssa->parent_instr->type != nir_instr_type_alu)
        return false;

-   nir_alu_instr *limit_alu =
nir_instr_as_alu(limit->def->parent_instr);
+   nir_alu_instr *limit_alu =
nir_instr_as_alu(limit->src.ssa->parent_instr);

     if (limit_alu->op == nir_op_imin ||
         limit_alu->op == nir_op_fmin) {
-      limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+      limit = _alu->src[0];

-      if (!is_var_constant(limit))
-         limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+      if (limit->src.ssa->parent_instr->type !=
nir_instr_type_load_const)
+         limit = _alu->src[1];


This is still horribly broken w.r.t swizzles because we're not tracking 
the component as we make this or the jump above for [if]min.


On further inspection I don't think this is a problem because the GLSL 
rules say the loop condition must be scalar boolean.





-      if (!is_var_constant(limit))
+      if (limit->src.ssa->parent_instr->type !=
nir_instr_type_load_const)
           return false;

-      *limit_val =
nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+      *limit_val =
+   
  nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];


        terminator->exact_trip_count_unknown = true;

@@ -777,19 +778,19 @@
is_supported_terminator_condition(nir_alu_instr *alu)

  static bool
  get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable
**ind,
-                             nir_loop_variable **limit,
+                             nir_alu_src **limit,
                               loop_info_state *state)
  {
     bool limit_rhs = true;

     /* We assume that the limit is the "right" operand */
     *ind = get_loop_var(alu->src[0].src.ssa, state);
-   *limit = get_loop_var(alu->src[1].src.ssa, state);
+   *limit = >src[1];

     if ((*ind)->type != basic_induction) {
        /* We had it the wrong way, flip things around */
        *ind = get_loop_var(alu->src[1].src.ssa, state);
-      *limit = get_loop_var(alu->src[0].src.ssa, state);
+      *limit = >src[0];
        limit_rhs = false;
     }

@@ -799,7 +800,7 @@ get_induction_and_limit_vars(nir_alu_instr *alu,
nir_loop_variable **ind,
  static void
  try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
                                   nir_loop_variable **ind,
-                                 nir_loop_variable **limit,
+                                 nir_alu_src **limit,
                                   bool *limit_rhs,
                                   loop_info_state *state)
  {
@@ -848,7 +849,7 @@ try_find_trip_count_vars_in_iand(nir_alu_instr
**alu,

     /* Try the other iand src if needed */
     if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
-       !is_var_constant(*limit)) {
+       (*limit)->src.ssa->parent_instr->type !=
nir_instr_type_load_const) {
        src = iand->src[1].src.ssa;
        if (src->parent_instr->type == nir_instr_type_alu) {
           nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
@@ -891,7 +892,7 @@ find_trip_count(loop_info_state *state)

        bool limit_rhs;
        nir_loop_variable *basic_ind = NULL;
-      nir_loop_variable *limit;
+      nir_alu_src *limit;
        if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
           nir_alu_instr *new_alu = alu;
           try_find_trip_count_vars_in_iand(_alu, _ind,
,
@@ -931,13 +932,13 @@ 

Re: [Mesa-dev] [PATCH] llvmpipe: make remove_shader_variant static.

2019-06-19 Thread Caio Marcelo de Oliveira Filho
On Thu, Jun 20, 2019 at 06:47:25AM +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> this isn't used outside this file.
> ---
>  src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +-
>  src/gallium/drivers/llvmpipe/lp_state_fs.h | 4 
>  2 files changed, 1 insertion(+), 5 deletions(-)

This patch is

`Reviewed-by: Caio Marcelo de Oliveira Filho `


Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] nir/loop_analyze: used nir_alu_src to track loop limit

2019-06-19 Thread Timothy Arceri



On 20/6/19 5:57 am, Jason Ekstrand wrote:
On Wed, Jun 19, 2019 at 3:09 AM Timothy Arceri > wrote:


This helps reduce the amount of abstraction in this pass and allows
us to retain more information about the src such as any swizzles.
Retaining the swizzle information is required for a bugfix in the
following patch.

Fixes: 6772a17acc8e ("nir: Add a loop analysis pass")
---
  src/compiler/nir/nir_loop_analyze.c | 37 +++--
  1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c
b/src/compiler/nir/nir_loop_analyze.c
index e85a404da1b..57d2d94cad2 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -543,25 +543,26 @@ guess_loop_limit(loop_info_state *state,
nir_const_value *limit_val,
  }

  static bool
-try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value
*limit_val,
-                      nir_loop_terminator *terminator,
loop_info_state *state)
+try_find_limit_of_alu(nir_alu_src *limit, nir_const_value *limit_val,
+                      nir_loop_terminator *terminator)
  {
-   if(!is_var_alu(limit))
+   if(limit->src.ssa->parent_instr->type != nir_instr_type_alu)
        return false;

-   nir_alu_instr *limit_alu =
nir_instr_as_alu(limit->def->parent_instr);
+   nir_alu_instr *limit_alu =
nir_instr_as_alu(limit->src.ssa->parent_instr);

     if (limit_alu->op == nir_op_imin ||
         limit_alu->op == nir_op_fmin) {
-      limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+      limit = _alu->src[0];

-      if (!is_var_constant(limit))
-         limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+      if (limit->src.ssa->parent_instr->type !=
nir_instr_type_load_const)
+         limit = _alu->src[1];


This is still horribly broken w.r.t swizzles because we're not tracking 
the component as we make this or the jump above for [if]min.


I think we should probably just do a check to make sure the limit and 
invariant are scalar if we don't already. The rest of the pass cannot 
handle vectors anyway swizzles or not, and I don't think we should 
bother handling it either.






-      if (!is_var_constant(limit))
+      if (limit->src.ssa->parent_instr->type !=
nir_instr_type_load_const)
           return false;

-      *limit_val =
nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+      *limit_val =
+   
  nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];


        terminator->exact_trip_count_unknown = true;

@@ -777,19 +778,19 @@
is_supported_terminator_condition(nir_alu_instr *alu)

  static bool
  get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable
**ind,
-                             nir_loop_variable **limit,
+                             nir_alu_src **limit,
                               loop_info_state *state)
  {
     bool limit_rhs = true;

     /* We assume that the limit is the "right" operand */
     *ind = get_loop_var(alu->src[0].src.ssa, state);
-   *limit = get_loop_var(alu->src[1].src.ssa, state);
+   *limit = >src[1];

     if ((*ind)->type != basic_induction) {
        /* We had it the wrong way, flip things around */
        *ind = get_loop_var(alu->src[1].src.ssa, state);
-      *limit = get_loop_var(alu->src[0].src.ssa, state);
+      *limit = >src[0];
        limit_rhs = false;
     }

@@ -799,7 +800,7 @@ get_induction_and_limit_vars(nir_alu_instr *alu,
nir_loop_variable **ind,
  static void
  try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
                                   nir_loop_variable **ind,
-                                 nir_loop_variable **limit,
+                                 nir_alu_src **limit,
                                   bool *limit_rhs,
                                   loop_info_state *state)
  {
@@ -848,7 +849,7 @@ try_find_trip_count_vars_in_iand(nir_alu_instr
**alu,

     /* Try the other iand src if needed */
     if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
-       !is_var_constant(*limit)) {
+       (*limit)->src.ssa->parent_instr->type !=
nir_instr_type_load_const) {
        src = iand->src[1].src.ssa;
        if (src->parent_instr->type == nir_instr_type_alu) {
           nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
@@ -891,7 +892,7 @@ find_trip_count(loop_info_state *state)

        bool limit_rhs;
        nir_loop_variable *basic_ind = NULL;
-      nir_loop_variable *limit;
+      nir_alu_src *limit;
        if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
           nir_alu_instr *new_alu = alu;
 

Re: [Mesa-dev] [PATCH 1/2] nir/loop_analyze: used nir_alu_src to track loop limit

2019-06-19 Thread Timothy Arceri

On 19/6/19 11:55 pm, Brian Paul wrote:

On 06/19/2019 02:08 AM, Timothy Arceri wrote:

This helps reduce the amount of abstraction in this pass and allows
us to retain more information about the src such as any swizzles.
Retaining the swizzle information is required for a bugfix in the
following patch.

Fixes: 6772a17acc8e ("nir: Add a loop analysis pass")
---
  src/compiler/nir/nir_loop_analyze.c | 37 +++--
  1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c

index e85a404da1b..57d2d94cad2 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -543,25 +543,26 @@ guess_loop_limit(loop_info_state *state, 
nir_const_value *limit_val,

  }
  static bool
-try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value 
*limit_val,
-  nir_loop_terminator *terminator, 
loop_info_state *state)

+try_find_limit_of_alu(nir_alu_src *limit, nir_const_value *limit_val,
+  nir_loop_terminator *terminator)
  {
-   if(!is_var_alu(limit))
+   if(limit->src.ssa->parent_instr->type != nir_instr_type_alu)
    return false;
-   nir_alu_instr *limit_alu = 
nir_instr_as_alu(limit->def->parent_instr);
+   nir_alu_instr *limit_alu = 
nir_instr_as_alu(limit->src.ssa->parent_instr);

 if (limit_alu->op == nir_op_imin ||
 limit_alu->op == nir_op_fmin) {
-  limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+  limit = _alu->src[0];
-  if (!is_var_constant(limit))
- limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+  if (limit->src.ssa->parent_instr->type != 
nir_instr_type_load_const)

+ limit = _alu->src[1];
-  if (!is_var_constant(limit))
+  if (limit->src.ssa->parent_instr->type != 
nir_instr_type_load_const)

   return false;
-  *limit_val = 
nir_instr_as_load_const(limit->def->parent_instr)->value[0];

+  *limit_val =
+ 
nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];

    terminator->exact_trip_count_unknown = true;
@@ -777,19 +778,19 @@ is_supported_terminator_condition(nir_alu_instr 
*alu)

  static bool
  get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable 
**ind,

- nir_loop_variable **limit,
+ nir_alu_src **limit,
   loop_info_state *state)
  {
 bool limit_rhs = true;
 /* We assume that the limit is the "right" operand */
 *ind = get_loop_var(alu->src[0].src.ssa, state);
-   *limit = get_loop_var(alu->src[1].src.ssa, state);
+   *limit = >src[1];
 if ((*ind)->type != basic_induction) {
    /* We had it the wrong way, flip things around */
    *ind = get_loop_var(alu->src[1].src.ssa, state);
-  *limit = get_loop_var(alu->src[0].src.ssa, state);
+  *limit = >src[0];
    limit_rhs = false;
 }
@@ -799,7 +800,7 @@ get_induction_and_limit_vars(nir_alu_instr *alu, 
nir_loop_variable **ind,

  static void
  try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
   nir_loop_variable **ind,
- nir_loop_variable **limit,
+ nir_alu_src **limit,
   bool *limit_rhs,
   loop_info_state *state)
  {
@@ -848,7 +849,7 @@ try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
 /* Try the other iand src if needed */
 if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
-   !is_var_constant(*limit)) {
+   (*limit)->src.ssa->parent_instr->type != 
nir_instr_type_load_const) {

    src = iand->src[1].src.ssa;
    if (src->parent_instr->type == nir_instr_type_alu) {
   nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
@@ -891,7 +892,7 @@ find_trip_count(loop_info_state *state)
    bool limit_rhs;
    nir_loop_variable *basic_ind = NULL;
-  nir_loop_variable *limit;
+  nir_alu_src *limit;
    if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
   nir_alu_instr *new_alu = alu;
   try_find_trip_count_vars_in_iand(_alu, _ind, ,
@@ -931,13 +932,13 @@ find_trip_count(loop_info_state *state)
    /* Attempt to find a constant limit for the loop */
    nir_const_value limit_val;
-  if (is_var_constant(limit)) {
+  if (limit->src.ssa->parent_instr->type == 
nir_instr_type_load_const) {

   limit_val =
-    nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+
nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];

    } else {
   trip_count_known = false;
- if (!try_find_limit_of_alu(limit, _val, terminator, 
state)) {

+ if (!try_find_limit_of_alu(limit, _val, terminator)) {
  /* Guess loop limit based on array access */
  if (!guess_loop_limit(state, _val, 

[Mesa-dev] [Bug 110673] amdgpu hevc encoding problems: segment fault and contents of garbage

2019-06-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110673

--- Comment #7 from Boyuan Zhang  ---
(In reply to zhoulei from comment #6)
> (In reply to Boyuan Zhang from comment #5)
> > Hi Zhoulei,
> > 
> > I fixed the second issue you reported (cbr/vbr corruption for hevc 
> > encoding).
> > 
> > Please give a try using this patch:
> > https://lists.freedesktop.org/archives/mesa-dev/2019-June/220304.html
> > 
> > Thanks,
> > Boyuan
> 
> Hi Boyuan:
> 
> I have tested these patches on RX580, it works.
> 
> In addition:
> 
> Changes in radeon_vcn_enc_1_2.c cannot applied.
> 
> It looks like a typo.
> 
> RENC_UVD_RATE_CONTROL_METHOD_NONE, radeon_uvd_enc_code_fixed_bits and
> 
> radeon_uvd_enc_code_ue are defined in radeon_uvd_enc.h.
> 
> shoud use marco and funtions defined in radeon_vcn_enc.h.


Thanks for your testing Zhoulei.

The typo has been fixed in the patch V2:
https://lists.freedesktop.org/archives/mesa-dev/2019-June/220450.html

Regards,
Boyuan

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110603] Blocky and black opacity/alpha using RADV on some games

2019-06-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110603

--- Comment #6 from tivob...@hostguru.top ---
I experienced a visually similar issue in witcher3 after updating my gentoo
system with a RX 570. Notable changes:
- installed mesa 19.1 (from 19.0.x, not sure which exact version)
- updated wine to 4.10 (from 4.6 iirc)
- migrated the gentoo profile from 17.0 to to the 17.1 (including all
recommended rebuilds)

Switching back to mesa 19.0.6 does not fix the issue, but setting the
RADV_DEBUG environment variable to "nohiz" does. Maybe that helps to identify
the issue?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeon/uvd: enable rate control for hevc encoding

2019-06-19 Thread boyuan.zhang
From: Boyuan Zhang 

Set cu_qp_delta_enable_flag on when rate control is enabled, and set it
off when rate control is disabled (e.g. constant qp).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110673
Cc: mesa-sta...@lists.freedesktop.org

V2: fix typo and add bugzilla info

Signed-off-by: Boyuan Zhang 
Acked-by: Leo Liu 
---
 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c 
b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
index 8f0e0099e7..9acc33d906 100644
--- a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -573,7 +573,13 @@ radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder 
*enc)
   enc->enc_pic.hevc_spec_misc.
   constrained_intra_pred_flag, 1);
radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
-   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+   if (enc->enc_pic.rc_session_init.rate_control_method ==
+  RENC_UVD_RATE_CONTROL_METHOD_NONE)
+  radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+   else {
+  radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+  radeon_uvd_enc_code_ue(enc, 0x0);
+   }
radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeon/vcn: enable rate control for hevc encoding

2019-06-19 Thread boyuan.zhang
From: Boyuan Zhang 

Set cu_qp_delta_enable_flag on when rate control is enabled, and set it
off when rate control is disabled (e.g. constant qp).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110673
Cc: mesa-sta...@lists.freedesktop.org

V2: fix typo and add bugzilla info

Signed-off-by: Boyuan Zhang 
Acked-by: Leo Liu 
---
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index 3302ed7524..e4b9162034 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -687,7 +687,13 @@ static void radeon_enc_nalu_pps_hevc(struct radeon_encoder 
*enc)
radeon_enc_code_se(enc, 0x0);
radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
-   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   if (enc->enc_pic.rc_session_init.rate_control_method ==
+   RENCODE_RATE_CONTROL_METHOD_NONE)
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   else {
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_ue(enc, 0x0);
+   }
radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] llvmpipe: make remove_shader_variant static.

2019-06-19 Thread Dave Airlie
From: Dave Airlie 

this isn't used outside this file.
---
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.h | 4 
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index ab285bed1ca..b05997a3aab 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -3023,7 +3023,7 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void 
*fs)
  * Remove shader variant from two lists: the shader's variant list
  * and the context's variant list.
  */
-void
+static void
 llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
struct lp_fragment_shader_variant *variant)
 {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h 
b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 28eccde17f8..dc04df8bd94 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -145,8 +145,4 @@ struct lp_fragment_shader
 void
 lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant);
 
-void
-llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
-   struct lp_fragment_shader_variant *variant);
-
 #endif /* LP_STATE_FS_H_ */
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] android: virgl: fix libmesa_virgil_common build and dependencies

2019-06-19 Thread Clayton Craft

On Sat, Jun 15, 2019 at 07:39:02AM +0200, Mauro Rossi wrote:

Fixes the following building errors and resolves Bug 110922
Fixes gallium_dri target missing symbols at linking.


Tested in CI, android build test completes successfully. Feel free to add my
tested-by. Thanks for the fix!




external/mesa/src/gallium/winsys/virgl/drm/Android.mk:
error: libmesa_winsys_virgl (STATIC_LIBRARIES android-x86_64) missing 
libmesa_winsys_virgl_common (STATIC_LIBRARIES android-x86_64)
...
external/mesa/src/gallium/winsys/virgl/vtest/Android.mk:
error: libmesa_winsys_virgl_vtest (STATIC_LIBRARIES android-x86_64) missing 
libmesa_winsys_virgl_common (STATIC_LIBRARIES android-x86_64)
...
build/core/main.mk:728: error: exiting from previous errors.

In file included from 
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c:34:
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h:35:10:
fatal error: 'virgl_resource_cache.h' file not found
^~~~
1 error generated.

In file included from 
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c:32:
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h:35:10:
fatal error: 'virgl_resource_cache.h' file not found
#include "virgl_resource_cache.h"
^~~~
1 error generated.

Fixes: b18f09a ("virgl: Introduce virgl_resource_cache")
Signed-off-by: Mauro Rossi 
---
src/gallium/Android.mk| 2 +-
src/gallium/drivers/virgl/Android.mk  | 2 +-
src/gallium/winsys/virgl/drm/Android.mk   | 2 ++
src/gallium/winsys/virgl/vtest/Android.mk | 2 ++
4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 3a3f042c7a..37e923c225 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -43,7 +43,7 @@ SUBDIRS += winsys/radeon/drm drivers/r300
SUBDIRS += winsys/radeon/drm drivers/r600
SUBDIRS += winsys/radeon/drm winsys/amdgpu/drm drivers/radeonsi
SUBDIRS += winsys/vc4/drm drivers/vc4
-SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
+SUBDIRS += winsys/virgl/common winsys/virgl/drm winsys/virgl/vtest 
drivers/virgl
SUBDIRS += winsys/svga/drm drivers/svga
SUBDIRS += winsys/etnaviv/drm drivers/etnaviv drivers/renderonly
SUBDIRS += state_trackers/dri
diff --git a/src/gallium/drivers/virgl/Android.mk 
b/src/gallium/drivers/virgl/Android.mk
index 0067dfa702..a6fe53fbe9 100644
--- a/src/gallium/drivers/virgl/Android.mk
+++ b/src/gallium/drivers/virgl/Android.mk
@@ -35,5 +35,5 @@ include $(BUILD_STATIC_LIBRARY)

ifneq ($(HAVE_GALLIUM_VIRGL),)
GALLIUM_TARGET_DRIVERS += virtio_gpu
-$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_virgl 
libmesa_winsys_virgl_vtest)
+$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_virgl_common 
libmesa_winsys_virgl libmesa_winsys_virgl_vtest)
endif
diff --git a/src/gallium/winsys/virgl/drm/Android.mk 
b/src/gallium/winsys/virgl/drm/Android.mk
index 5e2500774e..398a7645bc 100644
--- a/src/gallium/winsys/virgl/drm/Android.mk
+++ b/src/gallium/winsys/virgl/drm/Android.mk
@@ -27,6 +27,8 @@ include $(CLEAR_VARS)

LOCAL_SRC_FILES := $(C_SOURCES)

+LOCAL_C_INCLUDES := $(GALLIUM_TOP)/winsys/virgl/common
+
LOCAL_MODULE := libmesa_winsys_virgl

LOCAL_STATIC_LIBRARIES := libmesa_winsys_virgl_common
diff --git a/src/gallium/winsys/virgl/vtest/Android.mk 
b/src/gallium/winsys/virgl/vtest/Android.mk
index 5b33f67711..6d35223c8e 100644
--- a/src/gallium/winsys/virgl/vtest/Android.mk
+++ b/src/gallium/winsys/virgl/vtest/Android.mk
@@ -27,6 +27,8 @@ include $(CLEAR_VARS)

LOCAL_SRC_FILES := $(C_SOURCES)

+LOCAL_C_INCLUDES := $(GALLIUM_TOP)/winsys/virgl/common
+
LOCAL_MODULE := libmesa_winsys_virgl_vtest

LOCAL_STATIC_LIBRARIES := libmesa_winsys_virgl_common
--
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] nir/loop_analyze: used nir_alu_src to track loop limit

2019-06-19 Thread Jason Ekstrand
On Wed, Jun 19, 2019 at 3:09 AM Timothy Arceri 
wrote:

> This helps reduce the amount of abstraction in this pass and allows
> us to retain more information about the src such as any swizzles.
> Retaining the swizzle information is required for a bugfix in the
> following patch.
>
> Fixes: 6772a17acc8e ("nir: Add a loop analysis pass")
> ---
>  src/compiler/nir/nir_loop_analyze.c | 37 +++--
>  1 file changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/src/compiler/nir/nir_loop_analyze.c
> b/src/compiler/nir/nir_loop_analyze.c
> index e85a404da1b..57d2d94cad2 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -543,25 +543,26 @@ guess_loop_limit(loop_info_state *state,
> nir_const_value *limit_val,
>  }
>
>  static bool
> -try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value
> *limit_val,
> -  nir_loop_terminator *terminator, loop_info_state
> *state)
> +try_find_limit_of_alu(nir_alu_src *limit, nir_const_value *limit_val,
> +  nir_loop_terminator *terminator)
>  {
> -   if(!is_var_alu(limit))
> +   if(limit->src.ssa->parent_instr->type != nir_instr_type_alu)
>return false;
>
> -   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);
> +   nir_alu_instr *limit_alu =
> nir_instr_as_alu(limit->src.ssa->parent_instr);
>
> if (limit_alu->op == nir_op_imin ||
> limit_alu->op == nir_op_fmin) {
> -  limit = get_loop_var(limit_alu->src[0].src.ssa, state);
> +  limit = _alu->src[0];
>
> -  if (!is_var_constant(limit))
> - limit = get_loop_var(limit_alu->src[1].src.ssa, state);
> +  if (limit->src.ssa->parent_instr->type != nir_instr_type_load_const)
> + limit = _alu->src[1];
>

This is still horribly broken w.r.t swizzles because we're not tracking the
component as we make this or the jump above for [if]min.


>
> -  if (!is_var_constant(limit))
> +  if (limit->src.ssa->parent_instr->type != nir_instr_type_load_const)
>   return false;
>
> -  *limit_val =
> nir_instr_as_load_const(limit->def->parent_instr)->value[0];
> +  *limit_val =
> + nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
>
>terminator->exact_trip_count_unknown = true;
>
> @@ -777,19 +778,19 @@ is_supported_terminator_condition(nir_alu_instr *alu)
>
>  static bool
>  get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
> - nir_loop_variable **limit,
> + nir_alu_src **limit,
>   loop_info_state *state)
>  {
> bool limit_rhs = true;
>
> /* We assume that the limit is the "right" operand */
> *ind = get_loop_var(alu->src[0].src.ssa, state);
> -   *limit = get_loop_var(alu->src[1].src.ssa, state);
> +   *limit = >src[1];
>
> if ((*ind)->type != basic_induction) {
>/* We had it the wrong way, flip things around */
>*ind = get_loop_var(alu->src[1].src.ssa, state);
> -  *limit = get_loop_var(alu->src[0].src.ssa, state);
> +  *limit = >src[0];
>limit_rhs = false;
> }
>
> @@ -799,7 +800,7 @@ get_induction_and_limit_vars(nir_alu_instr *alu,
> nir_loop_variable **ind,
>  static void
>  try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
>   nir_loop_variable **ind,
> - nir_loop_variable **limit,
> + nir_alu_src **limit,
>   bool *limit_rhs,
>   loop_info_state *state)
>  {
> @@ -848,7 +849,7 @@ try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
>
> /* Try the other iand src if needed */
> if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
> -   !is_var_constant(*limit)) {
> +   (*limit)->src.ssa->parent_instr->type !=
> nir_instr_type_load_const) {
>src = iand->src[1].src.ssa;
>if (src->parent_instr->type == nir_instr_type_alu) {
>   nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
> @@ -891,7 +892,7 @@ find_trip_count(loop_info_state *state)
>
>bool limit_rhs;
>nir_loop_variable *basic_ind = NULL;
> -  nir_loop_variable *limit;
> +  nir_alu_src *limit;
>if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
>   nir_alu_instr *new_alu = alu;
>   try_find_trip_count_vars_in_iand(_alu, _ind, ,
> @@ -931,13 +932,13 @@ find_trip_count(loop_info_state *state)
>
>/* Attempt to find a constant limit for the loop */
>nir_const_value limit_val;
> -  if (is_var_constant(limit)) {
> +  if (limit->src.ssa->parent_instr->type ==
> nir_instr_type_load_const) {
>   limit_val =
> -nir_instr_as_load_const(limit->def->parent_instr)->value[0];
> +
> nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
>} else {
>   

Re: [Mesa-dev] [PATCH] radeon/vcn: enable rate control for hevc encoding

2019-06-19 Thread Liu, Leo
Please tag the bugzilla link to the commit messages. With that, the set 
of patches are

Acked-by: Leo Liu 


On 2019-06-17 3:07 p.m., boyuan.zh...@amd.com wrote:
> From: Boyuan Zhang 
>
> Set cu_qp_delta_enable_flag on when rate control is enabled, and it
> off when no rate control is disabled (constant qp).
>
> Signed-off-by: Boyuan Zhang 
> ---
>   src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 8 +++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> index 3302ed7524..e4b9162034 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> @@ -687,7 +687,13 @@ static void radeon_enc_nalu_pps_hevc(struct 
> radeon_encoder *enc)
>   radeon_enc_code_se(enc, 0x0);
>   radeon_enc_code_fixed_bits(enc, 
> enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1);
>   radeon_enc_code_fixed_bits(enc, 0x0, 1);
> - radeon_enc_code_fixed_bits(enc, 0x0, 1);
> + if (enc->enc_pic.rc_session_init.rate_control_method ==
> + RENC_UVD_RATE_CONTROL_METHOD_NONE)
> + radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
> + else {
> + radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
> + radeon_uvd_enc_code_ue(enc, 0x0);
> + }
>   radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
>   radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
>   radeon_enc_code_fixed_bits(enc, 0x0, 1);
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/10] panfrost: Remove other commented pointers

2019-06-19 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pandecode/decode.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index ce948f9ce02..5bc6dd84c60 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -2011,10 +2011,6 @@ pandecode_replay_vertex_or_tiler_job_mdg(const struct 
mali_job_descriptor_header
 {
 struct midgard_payload_vertex_tiler *PANDECODE_PTR_VAR(v, mem, 
payload);
 
-char *a = pointer_as_memory_reference(payload);
-pandecode_msg("vt payload: %s\n", a);
-free(a);
-
 pandecode_replay_vertex_tiler_postfix_pre(>postfix, job_no, 
h->job_type, "", false);
 
 pandecode_replay_indices(v->prefix.indices, v->prefix.index_count, 
job_no);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/10] panfrost/decode: Remove memory comments

2019-06-19 Thread Alyssa Rosenzweig
These do more harm than good at this point.

Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/pandecode/decode.c   | 20 ---
 1 file changed, 20 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 2c74d807d63..9b04646f43f 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -43,12 +43,6 @@ int pandecode_replay_jc(mali_ptr jc_gpu_va, bool bifrost);
free(a); \
 }
 
-#define MEMORY_COMMENT(obj, p) {\
-   char *a = pointer_as_memory_reference(obj->p); \
-   pandecode_msg("%s = %s\n", #p, a); \
-   free(a); \
-}
-
 #define DYN_MEMORY_PROP(obj, no, p) { \
if (obj->p) \
pandecode_prop("%s = %s_%d_p", #p, #p, no); \
@@ -1776,30 +1770,16 @@ pandecode_replay_vertex_tiler_postfix(const struct 
mali_vertex_tiler_postfix *p,
 pandecode_indent++;
 
 MEMORY_PROP(p, position_varying);
-MEMORY_COMMENT(p, position_varying);
 DYN_MEMORY_PROP(p, job_no, uniform_buffers);
-MEMORY_COMMENT(p, uniform_buffers);
 DYN_MEMORY_PROP(p, job_no, texture_trampoline);
-MEMORY_COMMENT(p, texture_trampoline);
 DYN_MEMORY_PROP(p, job_no, sampler_descriptor);
-MEMORY_COMMENT(p, sampler_descriptor);
 DYN_MEMORY_PROP(p, job_no, uniforms);
-MEMORY_COMMENT(p, uniforms);
 DYN_MEMORY_PROP(p, job_no, attributes);
-MEMORY_COMMENT(p, attributes);
 DYN_MEMORY_PROP(p, job_no, attribute_meta);
-MEMORY_COMMENT(p, attribute_meta);
 DYN_MEMORY_PROP(p, job_no, varyings);
-MEMORY_COMMENT(p, varyings);
 DYN_MEMORY_PROP(p, job_no, varying_meta);
-MEMORY_COMMENT(p, varying_meta);
 DYN_MEMORY_PROP(p, job_no, viewport);
-MEMORY_COMMENT(p, viewport);
 DYN_MEMORY_PROP(p, job_no, occlusion_counter);
-MEMORY_COMMENT(p, occlusion_counter);
-MEMORY_COMMENT(p, framebuffer & ~1);
-pandecode_msg("%" PRIx64 "\n", p->viewport);
-pandecode_msg("%" PRIx64 "\n", p->framebuffer);
 
 if (is_bifrost)
 pandecode_prop("framebuffer = scratchpad_%d_p", job_no);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/10] panfrost/decode: Identify "compute FBD"

2019-06-19 Thread Alyssa Rosenzweig
There is fundamentally not a framebuffer associated with a compute job.
Allocate a new structure for it so we don't mess up graphics when
decoding.

Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/include/panfrost-job.h   |  7 ++
 .../drivers/panfrost/pandecode/decode.c   | 22 +++
 2 files changed, 29 insertions(+)

diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h 
b/src/gallium/drivers/panfrost/include/panfrost-job.h
index 80ce66fa7e2..4b3c788e4e9 100644
--- a/src/gallium/drivers/panfrost/include/panfrost-job.h
+++ b/src/gallium/drivers/panfrost/include/panfrost-job.h
@@ -1406,6 +1406,13 @@ struct mali_single_framebuffer {
 /* More below this, maybe */
 } __attribute__((packed));
 
+/* On Midgard, this "framebuffer descriptor" is used for the framebuffer field
+ * of compute jobs. Superficially resembles a single framebuffer descriptor */
+
+struct mali_compute_fbd {
+u32 unknown[64];
+} __attribute__((packed));
+
 /* Format bits for the render target flags */
 
 #define MALI_MFBD_FORMAT_MSAA(1 << 1)
diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index daa9d4736a6..61e0a0123e4 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -539,6 +539,26 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
 printf("},\n");
 }
 
+static void
+pandecode_compute_fbd(uint64_t gpu_va, int job_no)
+{
+struct pandecode_mapped_memory *mem = 
pandecode_find_mapped_gpu_mem_containing(gpu_va);
+const struct mali_compute_fbd *PANDECODE_PTR_VAR(s, mem, (mali_ptr) 
gpu_va);
+
+pandecode_log("struct mali_compute_fbd framebuffer_%d = {\n", job_no);
+pandecode_indent++;
+
+pandecode_log(".unknown = {");
+
+for (int i = 0; i < sizeof(s->unknown) / sizeof(s->unknown[0]); ++i)
+printf("%X, ", s->unknown[i]);
+
+pandecode_log("},\n");
+
+pandecode_indent--;
+printf("},\n");
+}
+
 static void
 pandecode_replay_swizzle(unsigned swizzle)
 {
@@ -1275,6 +1295,8 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct 
mali_vertex_tiler_postfix
 pandecode_replay_scratchpad(p->framebuffer & ~FBD_TYPE, 
job_no, suffix);
 else if (p->framebuffer & MALI_MFBD)
 pandecode_replay_mfbd_bfr((u64) ((uintptr_t) p->framebuffer) & 
FBD_MASK, job_no, false);
+else if (job_type == JOB_TYPE_COMPUTE)
+pandecode_compute_fbd((u64) (uintptr_t) p->framebuffer, 
job_no);
 else
 pandecode_replay_sfbd((u64) (uintptr_t) p->framebuffer, 
job_no);
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/10] panfrost: Add missing 0x in invocation_count

2019-06-19 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pandecode/decode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 76baa06dda2..2c74d807d63 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -1158,7 +1158,7 @@ pandecode_replay_vertex_tiler_prefix(struct 
mali_vertex_tiler_prefix *p, int job
 pandecode_log_cont("{\n");
 pandecode_indent++;
 
-pandecode_prop("invocation_count = %" PRIx32, p->invocation_count);
+pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
 pandecode_prop("size_y_shift = %d", p->size_y_shift);
 pandecode_prop("size_z_shift = %d", p->size_z_shift);
 pandecode_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/10] panfrost/decode: Elide more zero fields

2019-06-19 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/pandecode/decode.c   | 22 ++-
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 9b04646f43f..ce948f9ce02 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -38,9 +38,11 @@
 int pandecode_replay_jc(mali_ptr jc_gpu_va, bool bifrost);
 
 #define MEMORY_PROP(obj, p) {\
-   char *a = pointer_as_memory_reference(obj->p); \
-   pandecode_prop("%s = %s", #p, a); \
-   free(a); \
+if (obj->p) { \
+char *a = pointer_as_memory_reference(obj->p); \
+pandecode_prop("%s = %s", #p, a); \
+free(a); \
+} \
 }
 
 #define DYN_MEMORY_PROP(obj, no, p) { \
@@ -1177,7 +1179,9 @@ pandecode_replay_vertex_tiler_prefix(struct 
mali_vertex_tiler_prefix *p, int job
  32) + 1);
 
 /* TODO: Decode */
-pandecode_prop("unknown_draw = 0x%" PRIx32, p->unknown_draw);
+if (p->unknown_draw)
+pandecode_prop("unknown_draw = 0x%" PRIx32, p->unknown_draw);
+
 pandecode_prop("workgroups_x_shift_3 = 0x%" PRIx32, 
p->workgroups_x_shift_3);
 
 pandecode_prop("draw_mode = %s", 
pandecode_draw_mode_name(p->draw_mode));
@@ -1187,7 +1191,8 @@ pandecode_replay_vertex_tiler_prefix(struct 
mali_vertex_tiler_prefix *p, int job
 if (p->index_count)
 pandecode_prop("index_count = MALI_POSITIVE(%" PRId32 ")", 
p->index_count + 1);
 
-pandecode_prop("negative_start = %d", p->negative_start);
+if (p->negative_start)
+pandecode_prop("negative_start = %d", p->negative_start);
 
 DYN_MEMORY_PROP(p, job_no, indices);
 
@@ -1897,6 +1902,9 @@ pandecode_replay_gl_enables(uint32_t gl_enables, int 
job_type)
 static void
 pandecode_replay_primitive_size(union midgard_primitive_size u, bool constant)
 {
+if (u.pointer == 0x0)
+return;
+
 pandecode_log(".primitive_size = {\n");
 pandecode_indent++;
 
@@ -2021,7 +2029,9 @@ pandecode_replay_vertex_or_tiler_job_mdg(const struct 
mali_job_descriptor_header
 pandecode_replay_vertex_tiler_prefix(>prefix, job_no);
 
 pandecode_replay_gl_enables(v->gl_enables, h->job_type);
-pandecode_prop("draw_start = %d", v->draw_start);
+
+if (v->draw_start)
+pandecode_prop("draw_start = %d", v->draw_start);
 
 #ifndef __LP64__
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/10] panfrost/decode: Skip decode of fragment backend in non-fragment

2019-06-19 Thread Alyssa Rosenzweig
This is all zero for anything but fragment shaders.

Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/pandecode/decode.c   | 72 ---
 1 file changed, 48 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 8adcb5a7d44..76baa06dda2 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -937,9 +937,23 @@ pandecode_replay_shader_address(const char *name, mali_ptr 
ptr)
 return shader_ptr;
 }
 
+static bool
+all_zero(unsigned *buffer, unsigned count)
+{
+for (unsigned i = 0; i < count; ++i) {
+if (buffer[i])
+return false;
+}
+
+return true;
+}
+
 static void
 pandecode_replay_stencil(const char *name, const struct mali_stencil_test 
*stencil)
 {
+if (all_zero((unsigned *) stencil, sizeof(stencil) / sizeof(unsigned)))
+return;
+
 const char *func = pandecode_func_name(stencil->func);
 const char *sfail = pandecode_stencil_op_name(stencil->sfail);
 const char *dpfail = pandecode_stencil_op_name(stencil->dpfail);
@@ -1019,6 +1033,9 @@ pandecode_bifrost_blend(void *descs, int job_no, int 
rt_no)
 static mali_ptr
 pandecode_midgard_blend(union midgard_blend *blend, bool is_shader)
 {
+if (all_zero((unsigned *) blend, sizeof(blend) / sizeof(unsigned)))
+return 0;
+
 pandecode_log(".blend = {\n");
 pandecode_indent++;
 
@@ -1376,37 +1393,43 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct 
mali_vertex_tiler_postfix
 pandecode_prop("depth_factor = %f", s->depth_factor);
 }
 
-bool invert_alpha_coverage = s->alpha_coverage & 0xFFF0;
-uint16_t inverted_coverage = invert_alpha_coverage ? 
~s->alpha_coverage : s->alpha_coverage;
+if (s->alpha_coverage) {
+bool invert_alpha_coverage = s->alpha_coverage & 
0xFFF0;
+uint16_t inverted_coverage = invert_alpha_coverage ? 
~s->alpha_coverage : s->alpha_coverage;
 
-pandecode_prop("alpha_coverage = %sMALI_ALPHA_COVERAGE(%f)",
- invert_alpha_coverage ? "~" : "",
- MALI_GET_ALPHA_COVERAGE(inverted_coverage));
-
-pandecode_log(".unknown2_3 = ");
+pandecode_prop("alpha_coverage = 
%sMALI_ALPHA_COVERAGE(%f)",
+ invert_alpha_coverage ? "~" : "",
+ 
MALI_GET_ALPHA_COVERAGE(inverted_coverage));
+}
 
-int unknown2_3 = s->unknown2_3;
-int unknown2_4 = s->unknown2_4;
+if (s->unknown2_3 || s->unknown2_4) {
+pandecode_log(".unknown2_3 = ");
 
-/* We're not quite sure what these flags mean without the 
depth test, if anything */
+int unknown2_3 = s->unknown2_3;
+int unknown2_4 = s->unknown2_4;
 
-if (unknown2_3 & (MALI_DEPTH_TEST | MALI_DEPTH_FUNC_MASK)) {
-const char *func = 
pandecode_func_name(MALI_GET_DEPTH_FUNC(unknown2_3));
-unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
+/* We're not quite sure what these flags mean without 
the depth test, if anything */
 
-pandecode_log_cont("MALI_DEPTH_FUNC(%s) | ", func);
-}
+if (unknown2_3 & (MALI_DEPTH_TEST | 
MALI_DEPTH_FUNC_MASK)) {
+const char *func = 
pandecode_func_name(MALI_GET_DEPTH_FUNC(unknown2_3));
+unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
 
-pandecode_log_decoded_flags(u3_flag_info, unknown2_3);
-pandecode_log_cont(",\n");
+pandecode_log_cont("MALI_DEPTH_FUNC(%s) | ", 
func);
+}
 
-pandecode_prop("stencil_mask_front = 0x%02X", 
s->stencil_mask_front);
-pandecode_prop("stencil_mask_back = 0x%02X", 
s->stencil_mask_back);
+pandecode_log_decoded_flags(u3_flag_info, unknown2_3);
+pandecode_log_cont(",\n");
 
-pandecode_log(".unknown2_4 = ");
-pandecode_log_decoded_flags(u4_flag_info, unknown2_4);
-pandecode_log_cont(",\n");
+pandecode_log(".unknown2_4 = ");
+pandecode_log_decoded_flags(u4_flag_info, unknown2_4);
+pandecode_log_cont(",\n");
+}
 
+if (s->stencil_mask_front || s->stencil_mask_back) {
+pandecode_prop("stencil_mask_front = 0x%02X", 
s->stencil_mask_front);
+

[Mesa-dev] [PATCH 03/10] panfrost/decode: Show int uniforms

2019-06-19 Thread Alyssa Rosenzweig
Float is ambiguous.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pandecode/decode.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 3757ee9dd4f..7b8b2c87f7e 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -1529,15 +1529,19 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct 
mali_vertex_tiler_postfix
 
 struct pandecode_mapped_memory *uniform_mem = 
pandecode_find_mapped_gpu_mem_containing(p->uniforms);
 pandecode_fetch_gpu_mem(uniform_mem, p->uniforms, sz);
-float *PANDECODE_PTR_VAR(uniforms, uniform_mem, p->uniforms);
+u32 *PANDECODE_PTR_VAR(uniforms, uniform_mem, p->uniforms);
 
-pandecode_log("float uniforms_%d%s[] = {\n", job_no, suffix);
+pandecode_log("u32 uniforms_%d%s[] = {\n", job_no, suffix);
 
 pandecode_indent++;
 
 for (int row = 0; row < rows; row++) {
-for (int i = 0; i < width; i++)
-pandecode_log_cont("%ff, ", uniforms[i]);
+for (int i = 0; i < width; i++) {
+u32 v = uniforms[i];
+float f;
+memcpy(, , sizeof(v));
+pandecode_log_cont("%X /* %f */, ", v, f);
+}
 
 pandecode_log_cont("\n");
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 05/10] panfrost/decode: Clip mali_compute_fbd at 64-bytes

2019-06-19 Thread Alyssa Rosenzweig
Looking at internal evidence (later fields including a literal other
compute job inception-style, seeming memory corruption, no clear
function, and the field after this being a pointer to *itself*), it
looks like this is really a much smaller descriptor.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/include/panfrost-job.h | 9 -
 src/gallium/drivers/panfrost/pandecode/decode.c | 9 -
 2 files changed, 18 deletions(-)

diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h 
b/src/gallium/drivers/panfrost/include/panfrost-job.h
index aaf6de72bc6..6da86148cd7 100644
--- a/src/gallium/drivers/panfrost/include/panfrost-job.h
+++ b/src/gallium/drivers/panfrost/include/panfrost-job.h
@@ -1411,15 +1411,6 @@ struct mali_single_framebuffer {
 
 struct mali_compute_fbd {
 u32 unknown1[16];
-mali_ptr unknown2;
-mali_ptr unknown3;
-u32 unknown4[16];
-u32 unknown5[18];
-mali_ptr unknown6;
-u32 unknown7[4];
-mali_ptr unknown8;
-mali_ptr unknown9;
-u32 unknown10[16];
 } __attribute__((packed));
 
 /* Format bits for the render target flags */
diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index c402ff91cbc..8adcb5a7d44 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -563,15 +563,6 @@ pandecode_compute_fbd(uint64_t gpu_va, int job_no)
 pandecode_indent++;
 
 SHORT_SLIDE(1);
-MEMORY_PROP(s, unknown2);
-MEMORY_PROP(s, unknown3);
-SHORT_SLIDE(4);
-SHORT_SLIDE(5);
-MEMORY_PROP(s, unknown6);
-SHORT_SLIDE(7);
-MEMORY_PROP(s, unknown8);
-MEMORY_PROP(s, unknown9);
-SHORT_SLIDE(10);
 
 pandecode_indent--;
 printf("},\n");
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/10] panfrost/decode: Expand pointers in compute descriptor

2019-06-19 Thread Alyssa Rosenzweig
Just as an aid.

Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/include/panfrost-job.h   | 11 ++-
 .../drivers/panfrost/pandecode/decode.c   | 30 +++
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h 
b/src/gallium/drivers/panfrost/include/panfrost-job.h
index 4b3c788e4e9..aaf6de72bc6 100644
--- a/src/gallium/drivers/panfrost/include/panfrost-job.h
+++ b/src/gallium/drivers/panfrost/include/panfrost-job.h
@@ -1410,7 +1410,16 @@ struct mali_single_framebuffer {
  * of compute jobs. Superficially resembles a single framebuffer descriptor */
 
 struct mali_compute_fbd {
-u32 unknown[64];
+u32 unknown1[16];
+mali_ptr unknown2;
+mali_ptr unknown3;
+u32 unknown4[16];
+u32 unknown5[18];
+mali_ptr unknown6;
+u32 unknown7[4];
+mali_ptr unknown8;
+mali_ptr unknown9;
+u32 unknown10[16];
 } __attribute__((packed));
 
 /* Format bits for the render target flags */
diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 61e0a0123e4..3757ee9dd4f 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -539,6 +539,20 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
 printf("},\n");
 }
 
+static void
+pandecode_u32_slide(unsigned name, const u32 *slide, unsigned count)
+{
+pandecode_log(".unknown%d = {", name);
+
+for (int i = 0; i < count; ++i)
+printf("%X, ", slide[i]);
+
+pandecode_log("},\n");
+}
+
+#define SHORT_SLIDE(num) \
+pandecode_u32_slide(num, s->unknown ## num, ARRAY_SIZE(s->unknown ## 
num))
+
 static void
 pandecode_compute_fbd(uint64_t gpu_va, int job_no)
 {
@@ -548,12 +562,16 @@ pandecode_compute_fbd(uint64_t gpu_va, int job_no)
 pandecode_log("struct mali_compute_fbd framebuffer_%d = {\n", job_no);
 pandecode_indent++;
 
-pandecode_log(".unknown = {");
-
-for (int i = 0; i < sizeof(s->unknown) / sizeof(s->unknown[0]); ++i)
-printf("%X, ", s->unknown[i]);
-
-pandecode_log("},\n");
+SHORT_SLIDE(1);
+MEMORY_PROP(s, unknown2);
+MEMORY_PROP(s, unknown3);
+SHORT_SLIDE(4);
+SHORT_SLIDE(5);
+MEMORY_PROP(s, unknown6);
+SHORT_SLIDE(7);
+MEMORY_PROP(s, unknown8);
+MEMORY_PROP(s, unknown9);
+SHORT_SLIDE(10);
 
 pandecode_indent--;
 printf("},\n");
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/10] panfrost/decode: Print COMPUTE uniforms as pointers

2019-06-19 Thread Alyssa Rosenzweig
In OpenGL, uniforms generally represent fp32 vec4s (at least in highp
mode). In OpenCL, they represent vec2s of 64-bit pointers.

Signed-off-by: Alyssa Rosenzweig 
---
 .../drivers/panfrost/pandecode/decode.c   | 25 ++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c 
b/src/gallium/drivers/panfrost/pandecode/decode.c
index 7b8b2c87f7e..c402ff91cbc 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -1523,7 +1523,9 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct 
mali_vertex_tiler_postfix
 pandecode_replay_attribute_meta(job_no, varying_count, p, 
true, suffix);
 }
 
-if (p->uniforms) {
+bool is_compute = job_type == JOB_TYPE_COMPUTE;
+
+if (p->uniforms && !is_compute) {
 int rows = uniform_count, width = 4;
 size_t sz = rows * width * sizeof(float);
 
@@ -1550,6 +1552,27 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct 
mali_vertex_tiler_postfix
 
 pandecode_indent--;
 pandecode_log("};\n");
+} else if (p->uniforms) {
+int rows = uniform_count * 2;
+size_t sz = rows * sizeof(mali_ptr);
+
+struct pandecode_mapped_memory *uniform_mem = 
pandecode_find_mapped_gpu_mem_containing(p->uniforms);
+pandecode_fetch_gpu_mem(uniform_mem, p->uniforms, sz);
+mali_ptr *PANDECODE_PTR_VAR(uniforms, uniform_mem, 
p->uniforms);
+
+pandecode_log("mali_ptr uniforms_%d%s[] = {\n", job_no, 
suffix);
+
+pandecode_indent++;
+
+for (int row = 0; row < rows; row++) {
+char *a = pointer_as_memory_reference(uniforms[row]);
+pandecode_log("%s,\n", a);
+free(a);
+}
+
+pandecode_indent--;
+pandecode_log("};\n");
+
 }
 
 if (p->uniform_buffers) {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 00/10] panfrost: Decode improvements for compute

2019-06-19 Thread Alyssa Rosenzweig
This series includes a number of assorted improvements to the decode
infrastructure to make compute shaders easier to read.

Alyssa Rosenzweig (10):
  panfrost/decode: Identify "compute FBD"
  panfrost/decode: Expand pointers in compute descriptor
  panfrost/decode: Show int uniforms
  panfrost/decode: Print COMPUTE uniforms as pointers
  panfrost/decode: Clip mali_compute_fbd at 64-bytes
  panfrost/decode: Skip decode of fragment backend in non-fragment
  panfrost: Add missing 0x in invocation_count
  panfrost/decode: Remove memory comments
  panfrost/decode: Elide more zero fields
  panfrost: Remove other commented pointers

 .../drivers/panfrost/include/panfrost-job.h   |   7 +
 .../drivers/panfrost/pandecode/decode.c   | 188 --
 2 files changed, 135 insertions(+), 60 deletions(-)

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 9/9] panfrost: Enable sRGB

2019-06-19 Thread Alyssa Rosenzweig
> Actually, I think this is the commit that introduced these regressions:
> 
> panfrost: Figure out job requirements in pan_job.c

...Hmm.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 9/9] panfrost: Enable sRGB

2019-06-19 Thread Tomeu Vizoso
On Wed, 19 Jun 2019 at 07:44, Tomeu Vizoso  wrote:
>
> On Tue, 18 Jun 2019 at 17:00, Alyssa Rosenzweig
>  wrote:
> >
> > Now that sRGB formats are supported for both rendering and sampling,
> > advertise support.
> >
> > Signed-off-by: Alyssa Rosenzweig 
>
> Hi there,
>
> this patch seems to have caused the following regressions:
>
> dEQP-GLES2.functional.fbo.render.recreate_colorbuffer.no_rebind_rbo_rgb565
> dEQP-GLES2.functional.fbo.render.resize.tex2d_rgb
>
> For more details, see 
> https://gitlab.freedesktop.org/tomeu/mesa/pipelines/43396
>
> Should we revert it for now?

Actually, I think this is the commit that introduced these regressions:

panfrost: Figure out job requirements in pan_job.c

Cheers,

Tomeu

> Cheers,
>
> Tomeu
>
> > ---
> >  src/gallium/drivers/panfrost/pan_screen.c | 4 
> >  1 file changed, 4 deletions(-)
> >
> > diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
> > b/src/gallium/drivers/panfrost/pan_screen.c
> > index 9cd98cd9051..70bff565930 100644
> > --- a/src/gallium/drivers/panfrost/pan_screen.c
> > +++ b/src/gallium/drivers/panfrost/pan_screen.c
> > @@ -450,10 +450,6 @@ panfrost_is_format_supported( struct pipe_screen 
> > *screen,
> >  if (sample_count > 1)
> >  return FALSE;
> >
> > -   /* sRGB colorspace is not supported (yet?) */
> > -   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
> > -   return FALSE;
> > -
> >  /* Format wishlist */
> >  if (format == PIPE_FORMAT_Z24X8_UNORM || format == 
> > PIPE_FORMAT_X8Z24_UNORM)
> >  return FALSE;
> > --
> > 2.20.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] nir/loop_analyze: used nir_alu_src to track loop limit

2019-06-19 Thread Brian Paul

On 06/19/2019 02:08 AM, Timothy Arceri wrote:

This helps reduce the amount of abstraction in this pass and allows
us to retain more information about the src such as any swizzles.
Retaining the swizzle information is required for a bugfix in the
following patch.

Fixes: 6772a17acc8e ("nir: Add a loop analysis pass")
---
  src/compiler/nir/nir_loop_analyze.c | 37 +++--
  1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index e85a404da1b..57d2d94cad2 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -543,25 +543,26 @@ guess_loop_limit(loop_info_state *state, nir_const_value 
*limit_val,
  }
  
  static bool

-try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val,
-  nir_loop_terminator *terminator, loop_info_state *state)
+try_find_limit_of_alu(nir_alu_src *limit, nir_const_value *limit_val,
+  nir_loop_terminator *terminator)
  {
-   if(!is_var_alu(limit))
+   if(limit->src.ssa->parent_instr->type != nir_instr_type_alu)
return false;
  
-   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);

+   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->src.ssa->parent_instr);
  
 if (limit_alu->op == nir_op_imin ||

 limit_alu->op == nir_op_fmin) {
-  limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+  limit = _alu->src[0];
  
-  if (!is_var_constant(limit))

- limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+  if (limit->src.ssa->parent_instr->type != nir_instr_type_load_const)
+ limit = _alu->src[1];
  
-  if (!is_var_constant(limit))

+  if (limit->src.ssa->parent_instr->type != nir_instr_type_load_const)
   return false;
  
-  *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value[0];

+  *limit_val =
+ nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
  
terminator->exact_trip_count_unknown = true;
  
@@ -777,19 +778,19 @@ is_supported_terminator_condition(nir_alu_instr *alu)
  
  static bool

  get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
- nir_loop_variable **limit,
+ nir_alu_src **limit,
   loop_info_state *state)
  {
 bool limit_rhs = true;
  
 /* We assume that the limit is the "right" operand */

 *ind = get_loop_var(alu->src[0].src.ssa, state);
-   *limit = get_loop_var(alu->src[1].src.ssa, state);
+   *limit = >src[1];
  
 if ((*ind)->type != basic_induction) {

/* We had it the wrong way, flip things around */
*ind = get_loop_var(alu->src[1].src.ssa, state);
-  *limit = get_loop_var(alu->src[0].src.ssa, state);
+  *limit = >src[0];
limit_rhs = false;
 }
  
@@ -799,7 +800,7 @@ get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,

  static void
  try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
   nir_loop_variable **ind,
- nir_loop_variable **limit,
+ nir_alu_src **limit,
   bool *limit_rhs,
   loop_info_state *state)
  {
@@ -848,7 +849,7 @@ try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
  
 /* Try the other iand src if needed */

 if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
-   !is_var_constant(*limit)) {
+   (*limit)->src.ssa->parent_instr->type != nir_instr_type_load_const) {
src = iand->src[1].src.ssa;
if (src->parent_instr->type == nir_instr_type_alu) {
   nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
@@ -891,7 +892,7 @@ find_trip_count(loop_info_state *state)
  
bool limit_rhs;

nir_loop_variable *basic_ind = NULL;
-  nir_loop_variable *limit;
+  nir_alu_src *limit;
if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
   nir_alu_instr *new_alu = alu;
   try_find_trip_count_vars_in_iand(_alu, _ind, ,
@@ -931,13 +932,13 @@ find_trip_count(loop_info_state *state)
  
/* Attempt to find a constant limit for the loop */

nir_const_value limit_val;
-  if (is_var_constant(limit)) {
+  if (limit->src.ssa->parent_instr->type == nir_instr_type_load_const) {
   limit_val =
-nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
} else {
   trip_count_known = false;
  
- if (!try_find_limit_of_alu(limit, _val, terminator, state)) {

+ if (!try_find_limit_of_alu(limit, _val, terminator)) {
  /* Guess loop limit based on array access */
  if (!guess_loop_limit(state, _val, 

[Mesa-dev] [PATCH] nir/lower_tex: Add an assert() in nir_lower_txs_lod()

2019-06-19 Thread Boris Brezillon
We don't expect the output of a TXS instruction to be wider than a
vec3. Add an assert() to make sure this never happens.

Suggested-by: Jason Ekstrand 
Signed-off-by: Boris Brezillon 
---
 src/compiler/nir/nir_lower_tex.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 8a36edfbc5e4..05b46b902521 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -1011,6 +1011,7 @@ nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
if (tex->is_array) {
   nir_ssa_def *comp[3];
 
+  assert(dest_size <= ARRAY_SIZE(comp));
   for (unsigned i = 0; i < dest_size - 1; i++)
  comp[i] = nir_channel(b, minified, i);
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/5] nir/lower_tex: Add a way to lower TXS(non-0-LOD) instructions

2019-06-19 Thread Boris Brezillon
Hi Jason,

On Tue, 18 Jun 2019 09:45:56 -0500
Jason Ekstrand  wrote:

> On Tue, Jun 18, 2019 at 2:38 AM Boris Brezillon <
> boris.brezil...@collabora.com> wrote:  
> 
> > The V3D driver has an open-coded solution for this, and we need the
> > same thing for Panfrost, so let's add a generic way to lower TXS(LOD)
> > into max(TXS(0) >> LOD, 1).
> >
> > Signed-off-by: Boris Brezillon 
> > ---
> > Changes in v2:
> > * Use == 0 instead of !
> > * Rework the minification logic as suggested by Jason
> > * Assign cursor pos at the beginning of the function
> > * Patch the LOD just after retrieving the old value
> > ---
> >  src/compiler/nir/nir.h   |  6 +
> >  src/compiler/nir/nir_lower_tex.c | 46 
> >  2 files changed, 52 insertions(+)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 4270df565111..8972b4af7480 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -3426,6 +3426,12 @@ typedef struct nir_lower_tex_options {
> >  */
> > bool lower_txd_clamp_if_sampler_index_not_lt_16;
> >
> > +   /**
> > +* If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs
> > with
> > +* 0-lod followed by a nir_ishr.
> > +*/
> > +   bool lower_txs_lod;
> > +
> > /**
> >  * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
> >  * mixed-up tg4 locations.
> > diff --git a/src/compiler/nir/nir_lower_tex.c
> > b/src/compiler/nir/nir_lower_tex.c
> > index 53719017a87f..6f82ca5f06db 100644
> > --- a/src/compiler/nir/nir_lower_tex.c
> > +++ b/src/compiler/nir/nir_lower_tex.c
> > @@ -978,6 +978,47 @@ lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
> > return true;
> >  }
> >
> > +static bool
> > +nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
> > +{
> > +   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
> > +   if (lod_idx < 0 ||
> > +   (nir_src_is_const(tex->src[lod_idx].src) &&
> > +nir_src_as_int(tex->src[lod_idx].src) == 0))
> > +  return false;
> > +
> > +   unsigned dest_size = nir_tex_instr_dest_size(tex);
> > +
> > +   b->cursor = nir_before_instr(>instr);
> > +   nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
> > +
> > +   /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
> > +   nir_instr_rewrite_src(>instr, >src[lod_idx].src,
> > + nir_src_for_ssa(nir_imm_int(b, 0)));
> > +
> > +   /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
> > +   b->cursor = nir_after_instr(>instr);
> > +   nir_ssa_def *minified = nir_imax(b, nir_ushr(b, >dest.ssa, lod),
> > +nir_imm_int(b, 1));
> > +
> > +   /* Make sure the component encoding the array size (if any) is not
> > +* minified.
> > +*/
> > +   if (tex->is_array) {
> > +  nir_ssa_def *comp[3];
> >  
> 
> Mind throwing in a quick assert?
> 
> assert(dest_size <= ARRAY_SIZE(comp));
> 
> With that added,
> 
> Reviewed-by: Jason Ekstrand 

The patch has been merged already, but I'll add that in a follow-up
patch.

Thanks,

Boris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110939] src/egl/main/egldisplay.c: In function '_eglGetNativePlatform': error: '_EGL_PLATFORM_' undeclared

2019-06-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110939

--- Comment #12 from Eric Engestrom  ---
FWIW the empty string thing is a know issue that will be fixed once we bump our
requirement to meson 0.47; the MR has been around for a while:
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/386

But yeah, until we can do that it's not a bad idea to add extra checks for
these empty values like the one Tapani suggested 

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/6] radv: initialize levels without DCC during layout transitions

2019-06-19 Thread Samuel Pitoiset


On 6/19/19 11:03 AM, Bas Nieuwenhuizen wrote:

Actually, retract r-b, please fix the legacy accesses on gfx9+ too.

Yes, I will fix and send v2.


On Wed, Jun 19, 2019, 11:02 AM Bas Nieuwenhuizen 
mailto:b...@basnieuwenhuizen.nl>> wrote:


R-b

On Tue, Jun 18, 2019, 4:12 PM Samuel Pitoiset
mailto:samuel.pitoi...@gmail.com>> wrote:

Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/vulkan/radv_cmd_buffer.c | 40
+++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c
b/src/amd/vulkan/radv_cmd_buffer.c
index a26bf6c6a67..ebeee2c3723 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -4912,11 +4912,49 @@ void radv_initialize_dcc(struct
radv_cmd_buffer *cmd_buffer,
                         const VkImageSubresourceRange *range,
uint32_t value)
 {
        struct radv_cmd_state *state = _buffer->state;
+       uint32_t level_count = radv_get_levelCount(image, range);
+       unsigned size = 0;

        state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;

-       state->flush_bits |= radv_clear_dcc(cmd_buffer, image,
range, value);
+       /* Initialize the mipmap levels with DCC first. */
+       for (unsigned l = 0; l < level_count; l++) {
+               uint32_t level = range->baseMipLevel + l;
+               struct legacy_surf_level *surf_level =
+  >planes[0].surface.u.legacy.level[level];
+
+               if (!surf_level->dcc_fast_clear_size)
+                       break;
+
+               state->flush_bits |=
radv_dcc_clear_level(cmd_buffer, image,
+    level, value);
+       }
+
+       /* When DCC is enabled with mipmaps, some levels might
not support fast
+        * clears and we have to initialize them as "fully
expanded".
+        */
+       if (image->planes[0].surface.num_dcc_levels > 1) {
+               /* Compute the size of all fast clearable DCC
levels. */
+               for (unsigned i = 0; i <
image->planes[0].surface.num_dcc_levels; i++) {
+                       struct legacy_surf_level *surf_level =
+  >planes[0].surface.u.legacy.level[i];
+
+                       if (!surf_level->dcc_fast_clear_size)
+                               break;
+
+                       size = surf_level->dcc_offset +
surf_level->dcc_fast_clear_size;
+               }
+
+               /* Initialize the mipmap levels without DCC. */
+               if (size != image->planes[0].surface.dcc_size) {
+                       state->flush_bits |=
+  radv_fill_buffer(cmd_buffer, image->bo,
+ image->offset + image->dcc_offset + size,
+ image->planes[0].surface.dcc_size - size,
+ 0x);
+               }
+       }

        state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-- 
2.22.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: disable viewport clamping even if FS doesn't write Z

2019-06-19 Thread Bas Nieuwenhuizen
R-b

On Tue, Jun 18, 2019, 6:55 PM Samuel Pitoiset 
wrote:

> This fixes new CTS dEQP-VK.pipeline.depth_range_unrestricted.*.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_pipeline.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c
> b/src/amd/vulkan/radv_pipeline.c
> index 8bc0d9b53e6..765f6105f7d 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -2788,8 +2788,7 @@ radv_pipeline_generate_depth_stencil_state(struct
> radeon_cmdbuf *ctx_cs,
> db_render_override |=
> S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
>
> S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
>
> -   if (!pCreateInfo->pRasterizationState->depthClampEnable &&
> -   ps->info.info.ps.writes_z) {
> +   if (!pCreateInfo->pRasterizationState->depthClampEnable) {
> /* From VK_EXT_depth_range_unrestricted spec:
>  *
>  * "The behavior described in Primitive Clipping still
> applies.
> --
> 2.22.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/6] radv: initialize levels without DCC during layout transitions

2019-06-19 Thread Bas Nieuwenhuizen
R-b

On Tue, Jun 18, 2019, 4:12 PM Samuel Pitoiset 
wrote:

> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 40 +++-
>  1 file changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
> b/src/amd/vulkan/radv_cmd_buffer.c
> index a26bf6c6a67..ebeee2c3723 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4912,11 +4912,49 @@ void radv_initialize_dcc(struct radv_cmd_buffer
> *cmd_buffer,
>  const VkImageSubresourceRange *range, uint32_t
> value)
>  {
> struct radv_cmd_state *state = _buffer->state;
> +   uint32_t level_count = radv_get_levelCount(image, range);
> +   unsigned size = 0;
>
> state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>  RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
>
> -   state->flush_bits |= radv_clear_dcc(cmd_buffer, image, range,
> value);
> +   /* Initialize the mipmap levels with DCC first. */
> +   for (unsigned l = 0; l < level_count; l++) {
> +   uint32_t level = range->baseMipLevel + l;
> +   struct legacy_surf_level *surf_level =
> +   >planes[0].surface.u.legacy.level[level];
> +
> +   if (!surf_level->dcc_fast_clear_size)
> +   break;
> +
> +   state->flush_bits |= radv_dcc_clear_level(cmd_buffer,
> image,
> + level, value);
> +   }
> +
> +   /* When DCC is enabled with mipmaps, some levels might not support
> fast
> +* clears and we have to initialize them as "fully expanded".
> +*/
> +   if (image->planes[0].surface.num_dcc_levels > 1) {
> +   /* Compute the size of all fast clearable DCC levels. */
> +   for (unsigned i = 0; i <
> image->planes[0].surface.num_dcc_levels; i++) {
> +   struct legacy_surf_level *surf_level =
> +
>  >planes[0].surface.u.legacy.level[i];
> +
> +   if (!surf_level->dcc_fast_clear_size)
> +   break;
> +
> +   size = surf_level->dcc_offset +
> surf_level->dcc_fast_clear_size;
> +   }
> +
> +   /* Initialize the mipmap levels without DCC. */
> +   if (size != image->planes[0].surface.dcc_size) {
> +   state->flush_bits |=
> +   radv_fill_buffer(cmd_buffer, image->bo,
> +image->offset +
> image->dcc_offset + size,
> +
> image->planes[0].surface.dcc_size - size,
> +0x);
> +   }
> +   }
>
> state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>  RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
> --
> 2.22.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/6] radv: initialize levels without DCC during layout transitions

2019-06-19 Thread Bas Nieuwenhuizen
Actually, retract r-b, please fix the legacy accesses on gfx9+ too.

On Wed, Jun 19, 2019, 11:02 AM Bas Nieuwenhuizen 
wrote:

> R-b
>
> On Tue, Jun 18, 2019, 4:12 PM Samuel Pitoiset 
> wrote:
>
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/amd/vulkan/radv_cmd_buffer.c | 40 +++-
>>  1 file changed, 39 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
>> b/src/amd/vulkan/radv_cmd_buffer.c
>> index a26bf6c6a67..ebeee2c3723 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -4912,11 +4912,49 @@ void radv_initialize_dcc(struct radv_cmd_buffer
>> *cmd_buffer,
>>  const VkImageSubresourceRange *range, uint32_t
>> value)
>>  {
>> struct radv_cmd_state *state = _buffer->state;
>> +   uint32_t level_count = radv_get_levelCount(image, range);
>> +   unsigned size = 0;
>>
>> state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>>  RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
>>
>> -   state->flush_bits |= radv_clear_dcc(cmd_buffer, image, range,
>> value);
>> +   /* Initialize the mipmap levels with DCC first. */
>> +   for (unsigned l = 0; l < level_count; l++) {
>> +   uint32_t level = range->baseMipLevel + l;
>> +   struct legacy_surf_level *surf_level =
>> +   >planes[0].surface.u.legacy.level[level];
>> +
>> +   if (!surf_level->dcc_fast_clear_size)
>> +   break;
>> +
>> +   state->flush_bits |= radv_dcc_clear_level(cmd_buffer,
>> image,
>> + level, value);
>> +   }
>> +
>> +   /* When DCC is enabled with mipmaps, some levels might not
>> support fast
>> +* clears and we have to initialize them as "fully expanded".
>> +*/
>> +   if (image->planes[0].surface.num_dcc_levels > 1) {
>> +   /* Compute the size of all fast clearable DCC levels. */
>> +   for (unsigned i = 0; i <
>> image->planes[0].surface.num_dcc_levels; i++) {
>> +   struct legacy_surf_level *surf_level =
>> +
>>  >planes[0].surface.u.legacy.level[i];
>> +
>> +   if (!surf_level->dcc_fast_clear_size)
>> +   break;
>> +
>> +   size = surf_level->dcc_offset +
>> surf_level->dcc_fast_clear_size;
>> +   }
>> +
>> +   /* Initialize the mipmap levels without DCC. */
>> +   if (size != image->planes[0].surface.dcc_size) {
>> +   state->flush_bits |=
>> +   radv_fill_buffer(cmd_buffer, image->bo,
>> +image->offset +
>> image->dcc_offset + size,
>> +
>> image->planes[0].surface.dcc_size - size,
>> +0x);
>> +   }
>> +   }
>>
>> state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
>>  RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
>> --
>> 2.22.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/6] gallium/util: Make u_debug_flush support persistent maps

2019-06-19 Thread VMware
From: Thomas Hellstrom 

Previously unsynchronized maps have been assumed to also be persistent,
Now destinguish between persistent and unsynchronized map and also support
PIPE_TRANSFER_PERSISTENT from ARB_buffer_storage.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 src/gallium/auxiliary/util/u_debug_flush.c | 91 +++---
 src/gallium/auxiliary/util/u_debug_flush.h |  4 +-
 2 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_debug_flush.c 
b/src/gallium/auxiliary/util/u_debug_flush.c
index c49af095ed1..c0be6681800 100644
--- a/src/gallium/auxiliary/util/u_debug_flush.c
+++ b/src/gallium/auxiliary/util/u_debug_flush.c
@@ -50,17 +50,26 @@
 #include "os/os_thread.h"
 #include 
 
+/* Future improvement: Use realloc instead? */
+#define DEBUG_FLUSH_MAP_DEPTH 16
+
+struct debug_map_item {
+   struct debug_stack_frame *frame;
+   boolean persistent;
+};
+
 struct debug_flush_buf {
/* Atomic */
struct pipe_reference reference; /* Must be the first member. */
mtx_t mutex;
/* Immutable */
-   boolean supports_unsync;
+   boolean supports_persistent;
unsigned bt_depth;
/* Protected by mutex */
-   boolean mapped;
-   boolean mapped_sync;
-   struct debug_stack_frame *map_frame;
+   int map_count;
+   boolean has_sync_map;
+   int last_sync_map;
+   struct debug_map_item maps[DEBUG_FLUSH_MAP_DEPTH];
 };
 
 struct debug_flush_item {
@@ -106,14 +115,14 @@ debug_flush_pointer_hash(void *key)
 }
 
 struct debug_flush_buf *
-debug_flush_buf_create(boolean supports_unsync, unsigned bt_depth)
+debug_flush_buf_create(boolean supports_persistent, unsigned bt_depth)
 {
struct debug_flush_buf *fbuf = CALLOC_STRUCT(debug_flush_buf);
 
if (!fbuf)
   goto out_no_buf;
 
-   fbuf->supports_unsync = supports_unsync;
+   fbuf->supports_persistent = supports_persistent;
fbuf->bt_depth = bt_depth;
pipe_reference_init(>reference, 1);
(void) mtx_init(>mutex, mtx_plain);
@@ -132,8 +141,11 @@ debug_flush_buf_reference(struct debug_flush_buf **dst,
struct debug_flush_buf *fbuf = *dst;
 
if (pipe_reference(&(*dst)->reference, >reference)) {
-  FREE(fbuf->map_frame);
+  int i;
 
+  for (i = 0; i < fbuf->map_count; ++i) {
+ FREE(fbuf->maps[i].frame);
+  }
   FREE(fbuf);
}
 
@@ -211,26 +223,41 @@ debug_flush_alert(const char *s, const char *op,
 void
 debug_flush_map(struct debug_flush_buf *fbuf, unsigned flags)
 {
-   boolean mapped_sync = FALSE;
+   boolean map_sync, persistent;
 
if (!fbuf)
   return;
 
mtx_lock(>mutex);
-   if (fbuf->mapped) {
-  debug_flush_alert("Recursive map detected.", "Map",
+   map_sync = !(flags & PIPE_TRANSFER_UNSYNCHRONIZED);
+   persistent = !map_sync || fbuf->supports_persistent ||
+  !!(flags & PIPE_TRANSFER_PERSISTENT);
+
+   /* Recursive maps are allowed if previous maps are persistent,
+* or if the current map is unsync. In other cases we might flush
+* with unpersistent maps.
+*/
+   if (fbuf->has_sync_map && !map_sync) {
+  debug_flush_alert("Recursive sync map detected.", "Map",
 2, fbuf->bt_depth, TRUE, TRUE, NULL);
   debug_flush_alert(NULL, "Previous map", 0, fbuf->bt_depth, FALSE,
-FALSE, fbuf->map_frame);
-   } else if (!(flags & PIPE_TRANSFER_UNSYNCHRONIZED) ||
-  !fbuf->supports_unsync) {
-  fbuf->mapped_sync = mapped_sync = TRUE;
+FALSE, fbuf->maps[fbuf->last_sync_map].frame);
+   }
+
+   fbuf->maps[fbuf->map_count].frame =
+  debug_flush_capture_frame(1, fbuf->bt_depth);
+   fbuf->maps[fbuf->map_count].persistent = persistent;
+   if (!persistent) {
+  fbuf->has_sync_map = TRUE;
+  fbuf->last_sync_map = fbuf->map_count;
}
-   fbuf->map_frame = debug_flush_capture_frame(1, fbuf->bt_depth);
-   fbuf->mapped = TRUE;
+
+   fbuf->map_count++;
+   assert(fbuf->map_count < DEBUG_FLUSH_MAP_DEPTH);
+
mtx_unlock(>mutex);
 
-   if (mapped_sync) {
+   if (!persistent) {
   struct debug_flush_ctx *fctx;
 
   mtx_lock(_mutex);
@@ -256,14 +283,24 @@ debug_flush_unmap(struct debug_flush_buf *fbuf)
   return;
 
mtx_lock(>mutex);
-   if (!fbuf->mapped)
+   if (--fbuf->map_count < 0) {
   debug_flush_alert("Unmap not previously mapped detected.", "Map",
 2, fbuf->bt_depth, FALSE, TRUE, NULL);
-
-   fbuf->mapped_sync = FALSE;
-   fbuf->mapped = FALSE;
-   FREE(fbuf->map_frame);
-   fbuf->map_frame = NULL;
+   } else {
+  if (fbuf->has_sync_map && fbuf->last_sync_map == fbuf->map_count) {
+ int i = fbuf->map_count;
+
+ fbuf->has_sync_map = FALSE;
+ while (i-- && !fbuf->has_sync_map) {
+if (!fbuf->maps[i].persistent) {
+   fbuf->has_sync_map = TRUE;
+   fbuf->last_sync_map = i;
+}
+ }
+ FREE(fbuf->maps[fbuf->map_count].frame);
+ fbuf->maps[fbuf->map_count].frame = NULL;
+  

[Mesa-dev] [PATCH 4/6] svga: Map vertex- index- and constant buffers ansynchronously when reading

2019-06-19 Thread VMware
From: Thomas Hellstrom 

With SWTNL and index translation we're mapping buffers for reading. These
buffers are commonly upload_mgr buffers that might already be referenced
by another submitted or unsubmitted GPU command. A synchronous map will
then trigger a flush and sync, at least on Linux that doesn't distinguish
between read- and write referencing. So map these buffers async. If they
for some obscure reason happen to be dirty (stream-output, buffer-copy),
the resource_buffer code will read-back and sync anyway. For persistent /
coherent buffers a corresponding read-back and sync will happen in the
kernel fault handler.

Testing: Piglit quick. No regressions.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 src/gallium/drivers/svga/svga_draw_elements.c | 4 +++-
 src/gallium/drivers/svga/svga_swtnl_draw.c| 9 ++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_draw_elements.c 
b/src/gallium/drivers/svga/svga_draw_elements.c
index b955b2f77e2..41cd4d18993 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -120,7 +120,9 @@ translate_indices(struct svga_hwtnl *hwtnl,
  goto fail;
 
   *out_offset = 0;
-  src_map = pipe_buffer_map(pipe, info->index.resource, PIPE_TRANSFER_READ,
+  src_map = pipe_buffer_map(pipe, info->index.resource,
+PIPE_TRANSFER_READ |
+PIPE_TRANSFER_UNSYNCHRONIZED,
 _transfer);
   if (!src_map)
  goto fail;
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c 
b/src/gallium/drivers/svga/svga_swtnl_draw.c
index a7db73e02ee..1aa15d8cd26 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -73,7 +73,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
   if (svga->curr.vb[i].buffer.resource) {
  map = pipe_buffer_map(>pipe,
svga->curr.vb[i].buffer.resource,
-   PIPE_TRANSFER_READ,
+   PIPE_TRANSFER_READ |
+   PIPE_TRANSFER_UNSYNCHRONIZED,
_transfer[i]);
 
  draw_set_mapped_vertex_buffer(draw, i, map, ~0);
@@ -88,7 +89,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
  map = (ubyte *) info->index.user;
   } else {
  map = pipe_buffer_map(>pipe, info->index.resource,
-   PIPE_TRANSFER_READ, _transfer);
+   PIPE_TRANSFER_READ |
+   PIPE_TRANSFER_UNSYNCHRONIZED, _transfer);
   }
   draw_set_indexes(draw,
(const ubyte *) map,
@@ -103,7 +105,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
 
   map = pipe_buffer_map(>pipe,
 svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer,
-PIPE_TRANSFER_READ,
+PIPE_TRANSFER_READ |
+PIPE_TRANSFER_UNSYNCHRONIZED,
 _transfer[i]);
   assert(map);
   draw_set_mapped_constant_buffer(
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/6] svga: Fix index buffer uploads

2019-06-19 Thread VMware
From: Thomas Hellstrom 

In the case of SWTNL and index translation we were uploading index buffers
and then reading out from them using the CPU. Furthermore, when translating
indices we often cached the results with an upload_mgr buffer, causing the
cached indexes to be immediately discarded on the next write to that
upload_mgr buffer.

Fix this by only uploading when we know the index buffer is going to be
used by hardware. If translating, only cache translated indices if the
original buffer was not a user buffer. In the latter case when we're not
caching, use an upload_mgr buffer for the hardware indices.

This means we can also remove the SWTNL hand-crafted index buffer upload
mechanism in favour of the upload_mgr.

Finally avoid using util_upload_index_buffer(). It wastes index buffer
space by trying to make sure that the offset of the indices in the
upload_mgr buffer is larger or equal to the position of the indices in
the source buffer. From what I can tell, the SVGA device does not
require that.

Testing done: Piglit quick. No regressions.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 src/gallium/drivers/svga/svga_draw.h  |  10 +-
 src/gallium/drivers/svga/svga_draw_elements.c | 168 +++---
 src/gallium/drivers/svga/svga_pipe_draw.c |  58 +-
 src/gallium/drivers/svga/svga_swtnl.h |   4 +-
 src/gallium/drivers/svga/svga_swtnl_backend.c |  58 ++
 src/gallium/drivers/svga/svga_swtnl_draw.c|  16 +-
 6 files changed, 142 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_draw.h 
b/src/gallium/drivers/svga/svga_draw.h
index baefcd94ec8..9d79676d3f9 100644
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -64,13 +64,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
 
 enum pipe_error
 svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
-   struct pipe_resource *indexBuffer,
-   unsigned index_size,
-   int index_bias,
-   unsigned min_index,
-   unsigned max_index,
-   enum pipe_prim_type prim, unsigned start, 
unsigned count,
-   unsigned start_instance, unsigned 
instance_count);
+   const struct pipe_draw_info *info,
+   unsigned count);
 
 boolean
 svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
@@ -80,5 +75,4 @@ enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl);
 
 void svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias);
 
-
 #endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c 
b/src/gallium/drivers/svga/svga_draw_elements.c
index b1db8710740..b955b2f77e2 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -59,33 +59,41 @@
  * \return error code to indicate success failure
  */
 static enum pipe_error
-translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src,
-  unsigned offset,
-  enum pipe_prim_type orig_prim, enum pipe_prim_type gen_prim,
+translate_indices(struct svga_hwtnl *hwtnl,
+  const struct pipe_draw_info *info,
+  enum pipe_prim_type gen_prim,
   unsigned orig_nr, unsigned gen_nr,
-  unsigned index_size,
-  u_translate_func translate, struct pipe_resource **out_buf)
+  unsigned gen_size,
+  u_translate_func translate,
+  struct pipe_resource **out_buf,
+  unsigned *out_offset)
 {
struct pipe_context *pipe = >svga->pipe;
struct svga_screen *screen = svga_screen(pipe->screen);
-   struct svga_buffer *src_sbuf = svga_buffer(src);
+   struct svga_buffer *src_sbuf = NULL;
struct pipe_transfer *src_transfer = NULL;
struct pipe_transfer *dst_transfer = NULL;
-   unsigned size = index_size * gen_nr;
+   const unsigned size = gen_size * gen_nr;
+   const unsigned offset = info->start * info->index_size;
const void *src_map = NULL;
struct pipe_resource *dst = NULL;
void *dst_map = NULL;
 
-   assert(index_size == 2 || index_size == 4);
+   assert(gen_size == 2 || gen_size == 4);
+   if (!info->has_user_indices)
+  src_sbuf = svga_buffer(info->index.resource);
 
-   if (!screen->debug.no_cache_index_buffers) {
+   /* If the draw_info provides us with a buffer rather than a
+* user pointer, Check to see if we've already translated that buffer
+*/
+   if (src_sbuf && !screen->debug.no_cache_index_buffers) {
   /* Check if we already have a translated index buffer */
   if (src_sbuf->translated_indices.buffer &&
-  src_sbuf->translated_indices.orig_prim == orig_prim &&
+  src_sbuf->translated_indices.orig_prim == info->mode &&
   

[Mesa-dev] [PATCH 2/6] winsys/svga: Make it possible to specify coherent resources

2019-06-19 Thread VMware
From: Thomas Hellstrom 

Add a flag in the surface cache key and a winsys usage flag to
specify coherent memory.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 src/gallium/drivers/svga/svga_screen_cache.c   |  2 ++
 src/gallium/drivers/svga/svga_screen_cache.h   |  1 +
 src/gallium/drivers/svga/svga_winsys.h |  7 +--
 src/gallium/winsys/svga/drm/vmw_buffer.c   |  4 ++--
 src/gallium/winsys/svga/drm/vmw_screen_ioctl.c |  3 ++-
 src/gallium/winsys/svga/drm/vmw_surface.c  | 18 ++
 6 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_screen_cache.c 
b/src/gallium/drivers/svga/svga_screen_cache.c
index e2282d6a15c..1916a64245c 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -550,6 +550,8 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
  usage |= SVGA_SURFACE_USAGE_SHARED;
   if (key->scanout)
  usage |= SVGA_SURFACE_USAGE_SCANOUT;
+  if (key->coherent)
+ usage |= SVGA_SURFACE_USAGE_COHERENT;
 
   handle = sws->surface_create(sws,
key->flags,
diff --git a/src/gallium/drivers/svga/svga_screen_cache.h 
b/src/gallium/drivers/svga/svga_screen_cache.h
index a239b761fc2..055a267c189 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -69,6 +69,7 @@ struct svga_host_surface_cache_key
uint32_t cachable:1; /* False if this is a shared surface */
uint32_t sampleCount:5;
uint32_t scanout:1;
+   uint32_t coherent:1;
 };
 
 
diff --git a/src/gallium/drivers/svga/svga_winsys.h 
b/src/gallium/drivers/svga/svga_winsys.h
index ee39db3bbbc..30d3f8776d9 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -81,8 +81,9 @@ struct winsys_handle;
 #define SVGA_FENCE_FLAG_EXEC  (1 << 0)
 #define SVGA_FENCE_FLAG_QUERY (1 << 1)
 
-#define SVGA_SURFACE_USAGE_SHARED  (1 << 0)
-#define SVGA_SURFACE_USAGE_SCANOUT (1 << 1)
+#define SVGA_SURFACE_USAGE_SHARED   (1 << 0)
+#define SVGA_SURFACE_USAGE_SCANOUT  (1 << 1)
+#define SVGA_SURFACE_USAGE_COHERENT (1 << 2)
 
 #define SVGA_QUERY_FLAG_SET(1 << 0)
 #define SVGA_QUERY_FLAG_REF(1 << 1)
@@ -677,6 +678,8 @@ struct svga_winsys_screen
/** Can we do DMA with guest-backed objects enabled? */
bool have_gb_dma;
 
+   /** Do we support coherent surface memory? */
+   bool have_coherent;
/**
 * Create and define a GB shader.
 */
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c 
b/src/gallium/winsys/svga/drm/vmw_buffer.c
index 91b5b259435..34c5e341782 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.c
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.c
@@ -315,7 +315,7 @@ vmw_svga_winsys_buffer_wrap(struct pb_buffer *buffer)
}
 
buf->pb_buf = buffer;
-   buf->fbuf = debug_flush_buf_create(TRUE, VMW_DEBUG_FLUSH_STACK);
+   buf->fbuf = debug_flush_buf_create(FALSE, VMW_DEBUG_FLUSH_STACK);
return buf;
 }
 
@@ -365,7 +365,7 @@ vmw_svga_winsys_buffer_map(struct svga_winsys_screen *sws,
STATIC_ASSERT((unsigned) PB_USAGE_UNSYNCHRONIZED ==
  (unsigned) PIPE_TRANSFER_UNSYNCHRONIZED);
 
-   map = pb_map(vmw_pb_buffer(buf), flags, NULL);
+   map = pb_map(vmw_pb_buffer(buf), flags & PB_USAGE_ALL, NULL);
 
 #ifdef DEBUG
if (map != NULL)
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c 
b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index a02d31c2bcb..2e84c811e82 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -246,7 +246,7 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
   if (usage & SVGA_SURFACE_USAGE_SHARED)
  req->base.drm_surface_flags |= drm_vmw_surface_flag_shareable;
 
-  if (vws->force_coherent)
+  if ((usage & SVGA_SURFACE_USAGE_COHERENT) || vws->force_coherent)
  req->base.drm_surface_flags |= drm_vmw_surface_flag_coherent;
 
   req->base.drm_surface_flags |= drm_vmw_surface_flag_create_buffer;
@@ -1117,6 +1117,7 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
  vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX;
 
   if (have_drm_2_16) {
+ vws->base.have_coherent = TRUE;
  getenv_val = getenv("SVGA_FORCE_COHERENT");
  if (getenv_val && strcmp(getenv_val, "0") != 0)
 vws->force_coherent = TRUE;
diff --git a/src/gallium/winsys/svga/drm/vmw_surface.c 
b/src/gallium/winsys/svga/drm/vmw_surface.c
index 69408ffe9d9..6aa09e11b76 100644
--- a/src/gallium/winsys/svga/drm/vmw_surface.c
+++ b/src/gallium/winsys/svga/drm/vmw_surface.c
@@ -45,21 +45,14 @@ vmw_svga_winsys_surface_map(struct svga_winsys_context *swc,
struct pb_buffer *pb_buf;
uint32_t pb_flags;
struct vmw_winsys_screen *vws = vsrf->screen;
-   
+
*retry = FALSE;
assert((flags & (PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE)) != 

[Mesa-dev] [PATCH 5/6] gallium/util: Make it possible to disable persistent maps in the upload manager

2019-06-19 Thread VMware
From: Thomas Hellstrom 

For svga, the use of persistent / coherent maps is typically slightly
slower than without them. It's probably a bit case-dependent and
possible to tune, but for now, make sure we can disable those.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 src/gallium/auxiliary/util/u_upload_mgr.c | 14 --
 src/gallium/auxiliary/util/u_upload_mgr.h |  4 
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c 
b/src/gallium/auxiliary/util/u_upload_mgr.c
index c2c0ba957e3..73f6cae0b6d 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -106,8 +106,10 @@ u_upload_clone(struct pipe_context *pipe, struct 
u_upload_mgr *upload)
struct u_upload_mgr *result = u_upload_create(pipe, upload->default_size,
  upload->bind, upload->usage,
  upload->flags);
-   if (upload->map_persistent &&
-   upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT)
+   if (!upload->map_persistent && result->map_persistent)
+  u_upload_disable_persistent(result);
+   else if (upload->map_persistent &&
+upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT)
   u_upload_enable_flush_explicit(result);
 
return result;
@@ -121,6 +123,14 @@ u_upload_enable_flush_explicit(struct u_upload_mgr *upload)
upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT;
 }
 
+void
+u_upload_disable_persistent(struct u_upload_mgr *upload)
+{
+   upload->map_persistent = FALSE;
+   upload->map_flags &= ~(PIPE_TRANSFER_COHERENT | PIPE_TRANSFER_PERSISTENT);
+   upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT;
+}
+
 static void
 upload_unmap_internal(struct u_upload_mgr *upload, boolean destroying)
 {
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h 
b/src/gallium/auxiliary/util/u_upload_mgr.h
index 80832016272..6a4a60963fe 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -73,6 +73,10 @@ u_upload_clone(struct pipe_context *pipe, struct 
u_upload_mgr *upload);
 void
 u_upload_enable_flush_explicit(struct u_upload_mgr *upload);
 
+/** Whether to avoid persistent mappings where available */
+void
+u_upload_disable_persistent(struct u_upload_mgr *upload);
+
 /**
  * Destroy the upload manager.
  */
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/6] svga: Support ARB_buffer_storage

2019-06-19 Thread VMware
From: Thomas Hellstrom 

This basically boils down to supporting persistent and coherent buffer
storage.
We chose to use coherent buffer storage for all persistent buffers
even if it's not explicitly specified, since using glMemoryBarrier to
obtain coherency would be particularly expensive in our driver stack,
and require a lot of additional bookkeeping.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 src/gallium/drivers/svga/svga_context.c |  6 ++
 src/gallium/drivers/svga/svga_resource_buffer.c | 13 +
 .../drivers/svga/svga_resource_buffer_upload.c  | 17 +
 .../drivers/svga/svga_resource_texture.c|  3 +++
 src/gallium/drivers/svga/svga_screen.c  |  3 ++-
 5 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_context.c 
b/src/gallium/drivers/svga/svga_context.c
index 57c0dc49957..104a551356d 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -148,12 +148,16 @@ svga_context_create(struct pipe_screen *screen, void 
*priv, unsigned flags)
if (!svga->pipe.stream_uploader)
   goto cleanup;
 
+   u_upload_disable_persistent(svga->pipe.stream_uploader);
+
svga->pipe.const_uploader = u_upload_create(>pipe, 128 * 1024,
PIPE_BIND_CONSTANT_BUFFER,
PIPE_USAGE_STREAM, 0);
if (!svga->pipe.const_uploader)
   goto cleanup;
 
+   u_upload_disable_persistent(svga->pipe.const_uploader);
+
svga->swc = svgascreen->sws->context_create(svgascreen->sws);
if (!svga->swc)
   goto cleanup;
@@ -236,6 +240,8 @@ svga_context_create(struct pipe_screen *screen, void *priv, 
unsigned flags)
if (!svga->const0_upload)
   goto cleanup;
 
+   u_upload_disable_persistent(svga->const0_upload);
+
if (!svga_texture_transfer_map_upload_create(svga))
   goto cleanup;
 
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c 
b/src/gallium/drivers/svga/svga_resource_buffer.c
index 234d825d5a2..712fffc83d3 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -71,6 +71,9 @@ svga_buffer_needs_hw_storage(const struct svga_screen *ss,
  bind_mask |= PIPE_BIND_CONSTANT_BUFFER;
}
 
+   if (template->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
+  return TRUE;
+
return !!(template->bind & bind_mask);
 }
 
@@ -126,7 +129,8 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
   pipe_resource_reference(>translated_indices.buffer, NULL);
}
 
-   if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) {
+   if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty &&
+   !sbuf->key.coherent && !svga->swc->force_coherent) {
   enum pipe_error ret;
 
   /* Host-side buffers can only be dirtied with vgpu10 features
@@ -160,7 +164,8 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
}
 
if (usage & PIPE_TRANSFER_WRITE) {
-  if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+  if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
+  !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)) {
  /*
   * Flush any pending primitives, finish writing any pending DMA
   * commands, and tell the host to discard the buffer contents on
@@ -317,7 +322,7 @@ svga_buffer_transfer_flush_region(struct pipe_context *pipe,
assert(transfer->usage & PIPE_TRANSFER_WRITE);
assert(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT);
 
-   if (!svga->swc->force_coherent || sbuf->swbuf) {
+   if (!(svga->swc->force_coherent || sbuf->key.coherent) || sbuf->swbuf) {
   mtx_lock(>swc_mutex);
   svga_buffer_add_range(sbuf, offset, offset + length);
   mtx_unlock(>swc_mutex);
@@ -361,7 +366,7 @@ svga_buffer_transfer_unmap(struct pipe_context *pipe,
 
  sbuf->dma.flags.discard = TRUE;
 
- if (!svga->swc->force_coherent || sbuf->swbuf)
+ if (!(svga->swc->force_coherent || sbuf->key.coherent) || sbuf->swbuf)
 svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0);
   }
}
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c 
b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 64f75231a65..1bb7431abf4 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -183,6 +183,14 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
  sbuf->key.flags = SVGA3D_SURFACE_TRANSFER_FROM_BUFFER;
   }
 
+  if (sbuf->b.b.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
+ /* This surface can be mapped persistently. We use
+  * coherent memory to avoid implementing memory barriers for
+  * persistent non-coherent memory for now.
+  */
+ sbuf->key.coherent = 1;
+  }
+
   sbuf->key.size.width = sbuf->b.b.width0;
   sbuf->key.size.height = 1;
   

[Mesa-dev] [PATCH 2/2] nir/loop_analyze: handle swizzles on the loop limit

2019-06-19 Thread Timothy Arceri
Fixes: 6772a17acc8e ("nir: Add a loop analysis pass")
---
 src/compiler/nir/nir_loop_analyze.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 57d2d94cad2..79ed5bb2712 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -561,8 +561,10 @@ try_find_limit_of_alu(nir_alu_src *limit, nir_const_value 
*limit_val,
   if (limit->src.ssa->parent_instr->type != nir_instr_type_load_const)
  return false;
 
+  unsigned limit_swz = limit->swizzle[0];
   *limit_val =
- nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
+ nir_instr_as_load_const(limit->src.ssa->parent_instr)
+->value[limit_swz];
 
   terminator->exact_trip_count_unknown = true;
 
@@ -933,8 +935,10 @@ find_trip_count(loop_info_state *state)
   /* Attempt to find a constant limit for the loop */
   nir_const_value limit_val;
   if (limit->src.ssa->parent_instr->type == nir_instr_type_load_const) {
+ unsigned limit_swz = limit->swizzle[0];
  limit_val =
-nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
+nir_instr_as_load_const(limit->src.ssa->parent_instr)
+   ->value[limit_swz];
   } else {
  trip_count_known = false;
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] nir/loop_analyze: used nir_alu_src to track loop limit

2019-06-19 Thread Timothy Arceri
This helps reduce the amount of abstraction in this pass and allows
us to retain more information about the src such as any swizzles.
Retaining the swizzle information is required for a bugfix in the
following patch.

Fixes: 6772a17acc8e ("nir: Add a loop analysis pass")
---
 src/compiler/nir/nir_loop_analyze.c | 37 +++--
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index e85a404da1b..57d2d94cad2 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -543,25 +543,26 @@ guess_loop_limit(loop_info_state *state, nir_const_value 
*limit_val,
 }
 
 static bool
-try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val,
-  nir_loop_terminator *terminator, loop_info_state *state)
+try_find_limit_of_alu(nir_alu_src *limit, nir_const_value *limit_val,
+  nir_loop_terminator *terminator)
 {
-   if(!is_var_alu(limit))
+   if(limit->src.ssa->parent_instr->type != nir_instr_type_alu)
   return false;
 
-   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);
+   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->src.ssa->parent_instr);
 
if (limit_alu->op == nir_op_imin ||
limit_alu->op == nir_op_fmin) {
-  limit = get_loop_var(limit_alu->src[0].src.ssa, state);
+  limit = _alu->src[0];
 
-  if (!is_var_constant(limit))
- limit = get_loop_var(limit_alu->src[1].src.ssa, state);
+  if (limit->src.ssa->parent_instr->type != nir_instr_type_load_const)
+ limit = _alu->src[1];
 
-  if (!is_var_constant(limit))
+  if (limit->src.ssa->parent_instr->type != nir_instr_type_load_const)
  return false;
 
-  *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+  *limit_val =
+ nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
 
   terminator->exact_trip_count_unknown = true;
 
@@ -777,19 +778,19 @@ is_supported_terminator_condition(nir_alu_instr *alu)
 
 static bool
 get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
- nir_loop_variable **limit,
+ nir_alu_src **limit,
  loop_info_state *state)
 {
bool limit_rhs = true;
 
/* We assume that the limit is the "right" operand */
*ind = get_loop_var(alu->src[0].src.ssa, state);
-   *limit = get_loop_var(alu->src[1].src.ssa, state);
+   *limit = >src[1];
 
if ((*ind)->type != basic_induction) {
   /* We had it the wrong way, flip things around */
   *ind = get_loop_var(alu->src[1].src.ssa, state);
-  *limit = get_loop_var(alu->src[0].src.ssa, state);
+  *limit = >src[0];
   limit_rhs = false;
}
 
@@ -799,7 +800,7 @@ get_induction_and_limit_vars(nir_alu_instr *alu, 
nir_loop_variable **ind,
 static void
 try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
  nir_loop_variable **ind,
- nir_loop_variable **limit,
+ nir_alu_src **limit,
  bool *limit_rhs,
  loop_info_state *state)
 {
@@ -848,7 +849,7 @@ try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
 
/* Try the other iand src if needed */
if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
-   !is_var_constant(*limit)) {
+   (*limit)->src.ssa->parent_instr->type != nir_instr_type_load_const) {
   src = iand->src[1].src.ssa;
   if (src->parent_instr->type == nir_instr_type_alu) {
  nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
@@ -891,7 +892,7 @@ find_trip_count(loop_info_state *state)
 
   bool limit_rhs;
   nir_loop_variable *basic_ind = NULL;
-  nir_loop_variable *limit;
+  nir_alu_src *limit;
   if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
  nir_alu_instr *new_alu = alu;
  try_find_trip_count_vars_in_iand(_alu, _ind, ,
@@ -931,13 +932,13 @@ find_trip_count(loop_info_state *state)
 
   /* Attempt to find a constant limit for the loop */
   nir_const_value limit_val;
-  if (is_var_constant(limit)) {
+  if (limit->src.ssa->parent_instr->type == nir_instr_type_load_const) {
  limit_val =
-nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+nir_instr_as_load_const(limit->src.ssa->parent_instr)->value[0];
   } else {
  trip_count_known = false;
 
- if (!try_find_limit_of_alu(limit, _val, terminator, state)) {
+ if (!try_find_limit_of_alu(limit, _val, terminator)) {
 /* Guess loop limit based on array access */
 if (!guess_loop_limit(state, _val, basic_ind)) {
continue;
-- 
2.21.0

___
mesa-dev mailing list

[Mesa-dev] [Bug 110673] amdgpu hevc encoding problems: segment fault and contents of garbage

2019-06-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110673

--- Comment #6 from zhoulei  ---
(In reply to Boyuan Zhang from comment #5)
> Hi Zhoulei,
> 
> I fixed the second issue you reported (cbr/vbr corruption for hevc encoding).
> 
> Please give a try using this patch:
> https://lists.freedesktop.org/archives/mesa-dev/2019-June/220304.html
> 
> Thanks,
> Boyuan

Hi Boyuan:

I have tested these patches on RX580, it works.

In addition:

Changes in radeon_vcn_enc_1_2.c cannot applied.

It looks like a typo.

RENC_UVD_RATE_CONTROL_METHOD_NONE, radeon_uvd_enc_code_fixed_bits and

radeon_uvd_enc_code_ue are defined in radeon_uvd_enc.h.

shoud use marco and funtions defined in radeon_vcn_enc.h.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: implement compressed FMASK texture reads with RADV_PERFTEST=tccompatcmask

2019-06-19 Thread Bas Nieuwenhuizen
Oops. No, r-b

On Wed, Jun 19, 2019, 9:48 AM Samuel Pitoiset 
wrote:

>
> On 6/17/19 12:24 PM, Bas Nieuwenhuizen wrote:
>
>
>
>
> On Thu, Jun 13, 2019, 3:42 PM Samuel Pitoiset 
> wrote:
>
>> This allows us to disable the FMASK decompress pass when
>> transitioning from CB writes to shader reads.
>>
>> This will likely be improved and enabled by default in the future.
>>
>> No CTS regressions on GFX8 but a few number of multisample CTS
>> failures on GFX9 (they look related to the small hint).
>>
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/amd/vulkan/radv_cmd_buffer.c  |  9 ++
>>  src/amd/vulkan/radv_debug.h   |  1 +
>>  src/amd/vulkan/radv_device.c  | 15 ++
>>  src/amd/vulkan/radv_image.c   | 42 +++
>>  src/amd/vulkan/radv_meta.h| 26 +
>>  src/amd/vulkan/radv_meta_fast_clear.c |  2 +-
>>  src/amd/vulkan/radv_private.h | 10 +++
>>  7 files changed, 104 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
>> b/src/amd/vulkan/radv_cmd_buffer.c
>> index 2fd5f8b7a07..bf208899887 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -1254,6 +1254,15 @@ radv_emit_fb_color_state(struct radv_cmd_buffer
>> *cmd_buffer,
>> cb_color_info &= C_028C70_DCC_ENABLE;
>> }
>>
>> +   if (radv_image_is_tc_compat_cmask(image) &&
>> +   (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
>> +radv_is_dcc_decompress_pipeline(cmd_buffer))) {
>> +   /* If this bit is set, the FMASK decompression operation
>> +* doesn't occur (DCC_COMPRESS also implies
>> FMASK_DECOMPRESS).
>> +*/
>> +   cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
>> +   }
>> +
>> if (cmd_buffer->device->physical_device->rad_info.chip_class >=
>> GFX9) {
>> radeon_set_context_reg_seq(cmd_buffer->cs,
>> R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
>> radeon_emit(cmd_buffer->cs, cb->cb_color_base);
>> diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
>> index 652a3b677d2..29793e549ce 100644
>> --- a/src/amd/vulkan/radv_debug.h
>> +++ b/src/amd/vulkan/radv_debug.h
>> @@ -61,6 +61,7 @@ enum {
>> RADV_PERFTEST_OUT_OF_ORDER   =   0x8,
>> RADV_PERFTEST_DCC_MSAA   =  0x10,
>> RADV_PERFTEST_BO_LIST=  0x20,
>> +   RADV_PERFTEST_TC_COMPAT_CMASK = 0x40,
>>  };
>>
>>  bool
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index 3b69e457496..b75ce59dfc3 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -479,6 +479,7 @@ static const struct debug_control
>> radv_perftest_options[] = {
>> {"localbos", RADV_PERFTEST_LOCAL_BOS},
>> {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
>> {"bolist", RADV_PERFTEST_BO_LIST},
>> +   {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
>> {NULL, 0}
>>  };
>>
>> @@ -4389,6 +4390,20 @@ radv_initialise_color_surface(struct radv_device
>> *device,
>> unsigned fmask_bankh =
>> util_logbase2(iview->image->fmask.bank_height);
>> cb->cb_color_attrib |=
>> S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
>> }
>> +
>> +   if (radv_image_is_tc_compat_cmask(iview->image)) {
>> +   /* Allow the texture block to read FMASK directly
>> +* without decompressing it. This bit must be
>> cleared
>> +* when performing FMASK_DECOMPRESS or
>> DCC_COMPRESS,
>> +* otherwise the operation doesn't happen.
>> +*/
>> +   cb->cb_color_info |=
>> S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
>> +
>> +   /* Set CMASK into a tiling format that allows the
>> +* texture block to read it.
>> +*/
>> +   cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
>> +   }
>> }
>>
>> if (radv_image_has_cmask(iview->image) &&
>> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
>> index d8dc2dfabde..c58c08fca59 100644
>> --- a/src/amd/vulkan/radv_image.c
>> +++ b/src/amd/vulkan/radv_image.c
>> @@ -219,6 +219,29 @@ radv_use_dcc_for_image(struct radv_device *device,
>> return true;
>>  }
>>
>> +static bool
>> +radv_use_tc_compat_cmask_for_image(struct radv_device *device,
>> +  struct radv_image *image)
>> +{
>
> +   if (!(device->instance->perftest_flags &
>> RADV_PERFTEST_TC_COMPAT_CMASK))
>> +   return false;
>> +
>> +   /* TC-compat CMASK is only available for GFX8+. */
>> +   if (device->physical_device->rad_info.chip_class < GFX8)
>> +   return false;
>> +
>> +   if (image->usage & 

Re: [Mesa-dev] [PATCH] radv: implement compressed FMASK texture reads with RADV_PERFTEST=tccompatcmask

2019-06-19 Thread Samuel Pitoiset


On 6/17/19 12:24 PM, Bas Nieuwenhuizen wrote:




On Thu, Jun 13, 2019, 3:42 PM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


This allows us to disable the FMASK decompress pass when
transitioning from CB writes to shader reads.

This will likely be improved and enabled by default in the future.

No CTS regressions on GFX8 but a few number of multisample CTS
failures on GFX9 (they look related to the small hint).

Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/vulkan/radv_cmd_buffer.c      |  9 ++
 src/amd/vulkan/radv_debug.h           |  1 +
 src/amd/vulkan/radv_device.c          | 15 ++
 src/amd/vulkan/radv_image.c           | 42
+++
 src/amd/vulkan/radv_meta.h            | 26 +
 src/amd/vulkan/radv_meta_fast_clear.c |  2 +-
 src/amd/vulkan/radv_private.h         | 10 +++
 7 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c
b/src/amd/vulkan/radv_cmd_buffer.c
index 2fd5f8b7a07..bf208899887 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1254,6 +1254,15 @@ radv_emit_fb_color_state(struct
radv_cmd_buffer *cmd_buffer,
                cb_color_info &= C_028C70_DCC_ENABLE;
        }

+       if (radv_image_is_tc_compat_cmask(image) &&
+           (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
+            radv_is_dcc_decompress_pipeline(cmd_buffer))) {
+               /* If this bit is set, the FMASK decompression
operation
+                * doesn't occur (DCC_COMPRESS also implies
FMASK_DECOMPRESS).
+                */
+               cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
+       }
+
        if
(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE
+ index * 0x3c, 11);
                radeon_emit(cmd_buffer->cs, cb->cb_color_base);
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 652a3b677d2..29793e549ce 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -61,6 +61,7 @@ enum {
        RADV_PERFTEST_OUT_OF_ORDER   =   0x8,
        RADV_PERFTEST_DCC_MSAA       =  0x10,
        RADV_PERFTEST_BO_LIST        =  0x20,
+       RADV_PERFTEST_TC_COMPAT_CMASK = 0x40,
 };

 bool
diff --git a/src/amd/vulkan/radv_device.c
b/src/amd/vulkan/radv_device.c
index 3b69e457496..b75ce59dfc3 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -479,6 +479,7 @@ static const struct debug_control
radv_perftest_options[] = {
        {"localbos", RADV_PERFTEST_LOCAL_BOS},
        {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
        {"bolist", RADV_PERFTEST_BO_LIST},
+       {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
        {NULL, 0}
 };

@@ -4389,6 +4390,20 @@ radv_initialise_color_surface(struct
radv_device *device,
                        unsigned fmask_bankh =
util_logbase2(iview->image->fmask.bank_height);
                        cb->cb_color_attrib |=
S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
                }
+
+               if (radv_image_is_tc_compat_cmask(iview->image)) {
+                       /* Allow the texture block to read FMASK
directly
+                        * without decompressing it. This bit must
be cleared
+                        * when performing FMASK_DECOMPRESS or
DCC_COMPRESS,
+                        * otherwise the operation doesn't happen.
+                        */
+                       cb->cb_color_info |=
S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
+
+                       /* Set CMASK into a tiling format that
allows the
+                        * texture block to read it.
+                        */
+                       cb->cb_color_info |=
S_028C70_CMASK_ADDR_TYPE(2);
+               }
        }

        if (radv_image_has_cmask(iview->image) &&
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index d8dc2dfabde..c58c08fca59 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -219,6 +219,29 @@ radv_use_dcc_for_image(struct radv_device
*device,
        return true;
 }

+static bool
+radv_use_tc_compat_cmask_for_image(struct radv_device *device,
+                                  struct radv_image *image)
+{

+       if (!(device->instance->perftest_flags &
RADV_PERFTEST_TC_COMPAT_CMASK))
+               return false;
+
+       /* TC-compat CMASK is only available for GFX8+. */
+       if (device->physical_device->rad_info.chip_class < GFX8)

Re: [Mesa-dev] [PATCH 5/6] radv: do not fast clears if one level can't be fast cleared

2019-06-19 Thread Samuel Pitoiset


On 6/19/19 1:38 AM, Bas Nieuwenhuizen wrote:

On Tue, Jun 18, 2019 at 4:12 PM Samuel Pitoiset
 wrote:

And fallback to slow color clears.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_meta_clear.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index 7f8f69e10f6..ed7f19b1dce 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -1515,6 +1515,20 @@ radv_can_fast_clear_color(struct radv_cmd_buffer 
*cmd_buffer,
 if (!can_avoid_fast_clear_elim)
 return false;
 }
+
+   if (iview->image->info.levels > 1) {
+   for (uint32_t l = 0; l < iview->level_count; l++) {
+   uint32_t level = iview->base_mip + l;
+   struct legacy_surf_level *surf_level =
+   
>image->planes[0].surface.u.legacy.level[level];

Same here, not comfortable accessing legacy data on GFX9.


Yeah, I will change that.

Thanks for the reviews Bas!


+
+   /* Do not fast clears if one level can't be
+* fast cleared.
+*/
+   if (!surf_level->dcc_fast_clear_size)
+   return false;
+   }
+   }
 }

 return true;
--
2.22.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110922] [regression][bisected] Android build test fails to include libmesa_winsys_virgl_common

2019-06-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110922

--- Comment #1 from Tapani Pälli  ---
Fix posted:
https://lists.freedesktop.org/archives/mesa-dev/2019-June/220233.html

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] android: virgl: fix libmesa_virgil_common build and dependencies

2019-06-19 Thread Tapani Pälli



On 6/15/19 8:42 AM, Mauro Rossi wrote:

Hi,
there is a typo in the commit title, the library is 
libmesa_winsys_virgl_common

I will correct it in the final commit


Reviewed-by: Tapani Pälli 


Mauro

On Sat, Jun 15, 2019 at 7:39 AM Mauro Rossi > wrote:


Fixes the following building errors and resolves Bug 110922
Fixes gallium_dri target missing symbols at linking.

external/mesa/src/gallium/winsys/virgl/drm/Android.mk:
error: libmesa_winsys_virgl (STATIC_LIBRARIES android-x86_64)
missing libmesa_winsys_virgl_common (STATIC_LIBRARIES android-x86_64)
...
external/mesa/src/gallium/winsys/virgl/vtest/Android.mk:
error: libmesa_winsys_virgl_vtest (STATIC_LIBRARIES android-x86_64)
missing libmesa_winsys_virgl_common (STATIC_LIBRARIES android-x86_64)
...
build/core/main.mk:728 : error: exiting from
previous errors.

In file included from
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c:34:
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h:35:10:
fatal error: 'virgl_resource_cache.h' file not found
          ^~~~
1 error generated.

In file included from
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c:32:
external/mesa/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h:35:10:
fatal error: 'virgl_resource_cache.h' file not found
#include "virgl_resource_cache.h"
          ^~~~
1 error generated.

Fixes: b18f09a ("virgl: Introduce virgl_resource_cache")
Signed-off-by: Mauro Rossi mailto:issor.or...@gmail.com>>
---
  src/gallium/Android.mk                    | 2 +-
  src/gallium/drivers/virgl/Android.mk      | 2 +-
  src/gallium/winsys/virgl/drm/Android.mk   | 2 ++
  src/gallium/winsys/virgl/vtest/Android.mk | 2 ++
  4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 3a3f042c7a..37e923c225 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -43,7 +43,7 @@ SUBDIRS += winsys/radeon/drm drivers/r300
  SUBDIRS += winsys/radeon/drm drivers/r600
  SUBDIRS += winsys/radeon/drm winsys/amdgpu/drm drivers/radeonsi
  SUBDIRS += winsys/vc4/drm drivers/vc4
-SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
+SUBDIRS += winsys/virgl/common winsys/virgl/drm winsys/virgl/vtest
drivers/virgl
  SUBDIRS += winsys/svga/drm drivers/svga
  SUBDIRS += winsys/etnaviv/drm drivers/etnaviv drivers/renderonly
  SUBDIRS += state_trackers/dri
diff --git a/src/gallium/drivers/virgl/Android.mk
b/src/gallium/drivers/virgl/Android.mk
index 0067dfa702..a6fe53fbe9 100644
--- a/src/gallium/drivers/virgl/Android.mk
+++ b/src/gallium/drivers/virgl/Android.mk
@@ -35,5 +35,5 @@ include $(BUILD_STATIC_LIBRARY)

  ifneq ($(HAVE_GALLIUM_VIRGL),)
  GALLIUM_TARGET_DRIVERS += virtio_gpu
-$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_virgl
libmesa_winsys_virgl_vtest)
+$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_virgl_common
libmesa_winsys_virgl libmesa_winsys_virgl_vtest)
  endif
diff --git a/src/gallium/winsys/virgl/drm/Android.mk
b/src/gallium/winsys/virgl/drm/Android.mk
index 5e2500774e..398a7645bc 100644
--- a/src/gallium/winsys/virgl/drm/Android.mk
+++ b/src/gallium/winsys/virgl/drm/Android.mk
@@ -27,6 +27,8 @@ include $(CLEAR_VARS)

  LOCAL_SRC_FILES := $(C_SOURCES)

+LOCAL_C_INCLUDES := $(GALLIUM_TOP)/winsys/virgl/common
+
  LOCAL_MODULE := libmesa_winsys_virgl

  LOCAL_STATIC_LIBRARIES := libmesa_winsys_virgl_common
diff --git a/src/gallium/winsys/virgl/vtest/Android.mk
b/src/gallium/winsys/virgl/vtest/Android.mk
index 5b33f67711..6d35223c8e 100644
--- a/src/gallium/winsys/virgl/vtest/Android.mk
+++ b/src/gallium/winsys/virgl/vtest/Android.mk
@@ -27,6 +27,8 @@ include $(CLEAR_VARS)

  LOCAL_SRC_FILES := $(C_SOURCES)

+LOCAL_C_INCLUDES := $(GALLIUM_TOP)/winsys/virgl/common
+
  LOCAL_MODULE := libmesa_winsys_virgl_vtest

  LOCAL_STATIC_LIBRARIES := libmesa_winsys_virgl_common
-- 
2.20.1



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev