date:20180308

Forgot it again 

Fixes: b6347807a9 "radv: Generate icd files."
---
 src/amd/vulkan/Makefile.am | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
index 80937e38d3..99d9e578d8 100644
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -142,10 +142,9 @@ BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json
 EXTRA_DIST = \
$(top_srcdir)/include/vulkan/vk_icd.h \
-   dev_icd.json.in \
-   radeon_icd.json.in \
radv_entrypoints_gen.py \
radv_extensions.py \
+   radv_icd.py \
vk_format_layout.csv \
vk_format_parse.py \
vk_format_table.py \
@@ -169,14 +168,12 @@ icdconf_DATA = radeon_icd.@host_cpu@.json
 # The following is used for development purposes, by setting VK_ICD_FILENAMES.
 noinst_DATA = dev_icd.json
 
-dev_icd.json : dev_icd.json.in
-   $(AM_V_GEN) $(SED) \
-   -e 
"s#@libvulkan_radeon_path@#${abs_top_builddir}/${LIB_DIR}/libvulkan_radeon.so#" 
\
-   < $(srcdir)/dev_icd.json.in > $@
+dev_icd.json : radv_extensions.py radv_icd.py
+   $(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_icd.py \
+   --lib-path="${abs_top_builddir}/${LIB_DIR}" --out $@
 
-radeon_icd.@host_cpu@.json : radeon_icd.json.in
-   $(AM_V_GEN) $(SED) \
-   -e "s#@install_libdir@#${libdir}#" \
-   < $(srcdir)/radeon_icd.json.in > $@
+vulkan/radeon_icd.@host_cpu@.json : radv_extensions.py radv_icd.py
+   $(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_icd.py \
+   --lib-path="${libdir}" --out $@
 
 include $(top_srcdir)/install-lib-links.mk
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: Make gl_vertex_array contain pointers to first order VAO members.

2018-03-08 Thread Mathias . Froehlich

From: Mathias Fröhlich 

Hi Brian,

The updated patch to be sure. The changes with removing the gl prefix lead to
an other small change in _tnl_import_array not to have attrib twice in the
argument list. Therefore the v2 again for review.
I did also rename the current values from curr_glattrib to current without
being requested by review.
So, to be sure: your RB is still valid for v2 then?

Plenty thanks for your review these weeks!!
best

Mathias




Instead of keeping a copy of the vertex array content in
struct gl_vertex_array only keep pointers to the first order
information originaly in the VAO.
For that represent the current values by struct gl_array_attributes
and struct gl_vertex_buffer_binding.

v2: Change comments.
Remove gl... prefix from variables except in the i965 directory where
it was like that before. Reindent because of that.

Signed-off-by: Mathias Fröhlich 
---
 src/mesa/drivers/dri/i965/brw_context.h   |   2 +-
 src/mesa/drivers/dri/i965/brw_draw.c  |  28 +++---
 src/mesa/drivers/dri/i965/brw_draw_upload.c   | 130 ++
 src/mesa/drivers/dri/i965/genX_state_upload.c |  23 +++--
 src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c  |  81 +---
 src/mesa/main/arrayobj.c  |  16 
 src/mesa/main/attrib.c|   1 -
 src/mesa/main/mtypes.h|  47 +++---
 src/mesa/main/varray.c|  21 -
 src/mesa/main/varray.h|  49 +++---
 src/mesa/state_tracker/st_atom.c  |   7 +-
 src/mesa/state_tracker/st_atom_array.c| 115 ++-
 src/mesa/state_tracker/st_cb_rasterpos.c  |  26 +++---
 src/mesa/state_tracker/st_draw_feedback.c |  46 ++---
 src/mesa/tnl/t_draw.c |  95 ++-
 src/mesa/tnl/t_rebase.c   |  20 ++--
 src/mesa/tnl/t_rebase.h   |   2 +-
 src/mesa/vbo/vbo.h|   4 +-
 src/mesa/vbo/vbo_context.c|  52 +--
 src/mesa/vbo/vbo_exec.c   |  16 ++--
 src/mesa/vbo/vbo_exec_api.c   |  22 ++---
 src/mesa/vbo/vbo_private.h|   3 +-
 src/mesa/vbo/vbo_save_draw.c  |   2 +-
 src/mesa/vbo/vbo_split.c  |   2 +-
 src/mesa/vbo/vbo_split.h  |   4 +-
 src/mesa/vbo/vbo_split_copy.c |  97 +++
 src/mesa/vbo/vbo_split_inplace.c  |   6 +-
 27 files changed, 480 insertions(+), 437 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index d3e7c71207..177273c364 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1459,7 +1459,7 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context 
*ctx);
 
 /* brw_draw_upload.c */
 unsigned brw_get_vertex_surface_type(struct brw_context *brw,
- const struct gl_vertex_array *glarray);
+ const struct gl_array_attributes *glattr);
 
 static inline unsigned
 brw_get_index_type(unsigned index_size)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 299e7f929e..0d1ae8982c 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -277,7 +277,7 @@ brw_emit_prim(struct brw_context *brw,
 
 static void
 brw_merge_inputs(struct brw_context *brw,
- const struct gl_vertex_array *arrays[])
+ const struct gl_vertex_array *arrays)
 {
const struct gen_device_info *devinfo = >screen->devinfo;
const struct gl_context *ctx = >ctx;
@@ -291,7 +291,7 @@ brw_merge_inputs(struct brw_context *brw,
 
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
   brw->vb.inputs[i].buffer = -1;
-  brw->vb.inputs[i].glarray = arrays[i];
+  brw->vb.inputs[i].glarray = [i];
}
 
if (devinfo->gen < 8 && !devinfo->is_haswell) {
@@ -300,14 +300,16 @@ brw_merge_inputs(struct brw_context *brw,
* 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
*/
   while (mask) {
+ const struct gl_array_attributes *glattrib;
  uint8_t wa_flags = 0;
 
  i = u_bit_scan64();
+ glattrib = brw->vb.inputs[i].glarray->VertexAttrib;
 
- switch (brw->vb.inputs[i].glarray->Type) {
+ switch (glattrib->Type) {
 
  case GL_FIXED:
-wa_flags = brw->vb.inputs[i].glarray->Size;
+wa_flags = glattrib->Size;
 break;
 
  case GL_INT_2_10_10_10_REV:
@@ -315,12 +317,12 @@ brw_merge_inputs(struct brw_context *brw,
 /* fallthough */
 
  case GL_UNSIGNED_INT_2_10_10_10_REV:
-if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
+if

Re: [Mesa-dev] [PATCH] gallium: silence __builtin_frame_address nonzero argument is unsafe warning

2018-03-08 Thread Jose Fonseca


Makes sense.  Thanks

Reviewed-by: Jose Fonseca 

On 09/03/18 00:00, Timothy Arceri wrote:

Calling __builtin_frame_address with a nonzero argument is unsafe
but is sometimes done for debugging purposes. Since this code is
part of some debug util code I'm assuming that is the case here
and using GCC pragma to silence the warning.

Cc: José Fonseca 
---
  src/gallium/auxiliary/util/u_debug_stack.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_debug_stack.c 
b/src/gallium/auxiliary/util/u_debug_stack.c
index 6ddacdb362..974e639e89 100644
--- a/src/gallium/auxiliary/util/u_debug_stack.c
+++ b/src/gallium/auxiliary/util/u_debug_stack.c
@@ -265,7 +265,10 @@ debug_backtrace_capture(struct debug_stack_frame 
*backtrace,
  #endif
  
  #if defined(PIPE_CC_GCC)

+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-address"
 frame_pointer = ((const void **)__builtin_frame_address(1));
+#pragma GCC diagnostic pop
  #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
 __asm {
mov frame_pointer, ebp



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105412] [OpenGL] [HD 7790] - VSYNC always ON in all videogames

2018-03-08 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105412

Bug ID: 105412
   Summary: [OpenGL] [HD 7790] - VSYNC always ON in all videogames
   Product: Mesa
   Version: unspecified
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: critical
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: c476...@nwytg.com
QA Contact: mesa-dev@lists.freedesktop.org

Linux 4.13.0-36-generic #40~16.04.1-Ubuntu SMP Fri Feb 16 23:25:58 UTC 2018
x86_64 x86_64 x86_64 GNU/Linux

01:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI]
Bonaire XT [Radeon HD 7790/8770 / R7 360 / R9 260/360 OEM] (prog-if 00 [VGA
controller])
Subsystem: Micro-Star International Co., Ltd. [MSI] Bonaire XT [Radeon
HD 7790/8770 / R7 360 / R9 260/360 OEM]
Flags: bus master, fast devsel, latency 0, IRQ 25
Memory at d000 (64-bit, prefetchable) [size=256M]
Memory at cf80 (64-bit, prefetchable) [size=8M]
I/O ports at d000 [size=256]
Memory at f7fc (32-bit, non-prefetchable) [size=256K]
Expansion ROM at 000c [disabled] [size=128K]
Capabilities: [48] Vendor Specific Information: Len=08 
Capabilities: [50] Power Management version 3
Capabilities: [58] Express Legacy Endpoint, MSI 00
Capabilities: [a0] MSI: Enable+ Count=1/1 Maskable- 64bit+
Capabilities: [100] Vendor Specific Information: ID=0001 Rev=1 Len=010

Capabilities: [150] Advanced Error Reporting
Capabilities: [270] #19
Capabilities: [2b0] Address Translation Service (ATS)
Capabilities: [2c0] #13
Capabilities: [2d0] #1b
Kernel driver in use: radeon
Kernel modules: radeon, amdgpu


After update linux kernel (> 4.09) VSYNC always(!!!) ON mode in all OpenGL
videogames (max < 60 fps). Please, FIX and set correct VSYNC mode.
"vblank_mode=0" is do NOT resolve this problem!

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: implement multisample image copies

From: Dave Airlie 

It appears its quite legal to do image copies on multisample
images, however due to a bug in our txf handling and incomplete
tests we never actually noticed we didn't do it properly in radv.

This patch implements a compute shader to copy multiple samples
of an image to another image. It implements the nir txf_ms_mcs
opcode for getting the fmask value, and then uses that value
to either copy sample 0 to all samples, or iterate across
the valid samples copying them.

The shader is inspired by one RE'd from AMDVLK.

Fixes:
dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image*

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c |  54 ++---
 src/amd/vulkan/radv_meta_bufimage.c | 229 ++--
 src/amd/vulkan/radv_meta_copy.c |   2 +-
 src/amd/vulkan/radv_private.h   |   2 +-
 4 files changed, 258 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b850698608..071f54a5b61 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2277,6 +2277,7 @@ static LLVMValueRef build_tex_intrinsic(struct 
ac_nir_context *ctx,
switch (instr->op) {
case nir_texop_txf:
case nir_texop_txf_ms:
+   case nir_texop_txf_ms_mcs:
case nir_texop_samples_identical:
args->opcode = lod_is_zero ||
   instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
@@ -3417,6 +3418,24 @@ glsl_is_array_image(const struct glsl_type *type)
 }
 
 
+static LLVMValueRef get_fmask_desc_valid(struct ac_llvm_context *ctx,
+   LLVMValueRef fmask_desc_ptr)
+{
+   LLVMValueRef fmask_desc =
+   LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
+ctx->v8i32, "");
+
+   LLVMValueRef fmask_word1 =
+   LLVMBuildExtractElement(ctx->builder, fmask_desc,
+   ctx->i32_1, "");
+
+   LLVMValueRef word1_is_nonzero =
+   LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ fmask_word1, ctx->i32_0, "");
+
+   return word1_is_nonzero;
+}
+
 /* Adjust the sample index according to FMASK.
  *
  * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
@@ -3475,17 +3494,7 @@ static LLVMValueRef 
adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
 * resource descriptor is 0 (invalid),
 */
-   LLVMValueRef fmask_desc =
-   LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
-ctx->v8i32, "");
-
-   LLVMValueRef fmask_word1 =
-   LLVMBuildExtractElement(ctx->builder, fmask_desc,
-   ctx->i32_1, "");
-
-   LLVMValueRef word1_is_nonzero =
-   LLVMBuildICmp(ctx->builder, LLVMIntNE,
- fmask_word1, ctx->i32_0, "");
+   LLVMValueRef word1_is_nonzero = get_fmask_desc_valid(ctx, 
fmask_desc_ptr);
 
/* Replace the MSAA sample index. */
sample_index =
@@ -3518,7 +3527,7 @@ static LLVMValueRef get_image_coords(struct 
ac_nir_context *ctx,
bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
count = image_type_to_components_count(dim, is_array);
 
-   if (is_ms) {
+   if (is_ms && instr->intrinsic == nir_intrinsic_image_load) {
LLVMValueRef fmask_load_address[3];
int chan;
 
@@ -4899,7 +4908,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx,
if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, 
*samp_ptr);
}
-   if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
+   if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || 
instr->op == nir_texop_txf_ms_mcs ||
 instr->op == 
nir_texop_samples_identical))
*fmask_ptr = get_sampler_desc(ctx, instr->texture, 
AC_DESC_FMASK, instr, false, false);
 }
@@ -5150,7 +5159,7 @@ static void visit_tex(struct ac_nir_context *ctx, 
nir_tex_instr *instr)
/* Pack LOD */
if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) 
&& !lod_is_zero)) {
address[count++] = lod;
-   } else if (instr->op == nir_texop_txf_ms && sample_index) {
+   } else if ((instr->op == nir_texop_txf_ms || instr->op == 
nir_texop_txf) && sample_index) {
address[count++] = sample_index;
} else if(instr->op == nir_texop_txs) {
count = 0;
@@ -5165,7 +5174,8 @@ static void visit_tex(struct ac_nir_context *ctx, 
nir_tex_instr *instr)
 address[chan], ctx->ac.i32, 
"");

[Mesa-dev] [PATCH] r600: implement callstack workaround for evergreen.

From: Dave Airlie 

This is ported from the sb backend, there are some issues with
evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE
instructions.

Whenever we are going to use a push before, we check the stack
usage and if we have to use the workaround, then we switch to
a separate push.

I noticed this problem dealing with some of the soft fp64 shaders,
in nosb mode, they are quite stack happy.

This fixes all the glitches and inconsistencies I've seen with them

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 39 +++---
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 48750fb..3ca7890 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction {
 static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct 
pipe_stream_output_info *so, int stream, bool ind);
 static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], 
cm_shader_tgsi_instruction[];
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned 
reason);
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
 static int tgsi_else(struct r600_shader_ctx *ctx);
 static int tgsi_endif(struct r600_shader_ctx *ctx);
@@ -393,6 +393,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src 
*bc_src,
 static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
   unsigned dst_reg, unsigned mask);
 
+static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
+{
+   if (ctx->bc->family == CHIP_HEMLOCK ||
+   ctx->bc->family == CHIP_CYPRESS ||
+   ctx->bc->family == CHIP_JUNIPER)
+   return false;
+   return true;
+}
+
 static bool ctx_has_doubles(struct r600_shader_ctx *ctx)
 {
if (ctx->bc->family == CHIP_ARUBA ||
@@ -10182,7 +10191,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
return 0;
 }
 
-static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
+static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
   unsigned reason)
 {
struct r600_stack_info *stack = >bc->stack;
@@ -10200,7 +10209,7 @@ static inline void callstack_update_max_depth(struct 
r600_shader_ctx *ctx,
/* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 
elements on
 * the stack must be reserved to hold the current 
active/continue
 * masks */
-   if (reason == FC_PUSH_VPM) {
+   if (reason == FC_PUSH_VPM || stack->push > 0) {
elements += 2;
}
break;
@@ -10226,7 +10235,7 @@ static inline void callstack_update_max_depth(struct 
r600_shader_ctx *ctx,
 *NOTE: it seems we also need to reserve additional element 
in some
 *other cases, e.g. when we have 4 levels of PUSH_VPM in 
the shader,
 *then STACK_SIZE should be 2 instead of 1 */
-   if (reason == FC_PUSH_VPM) {
+   if (reason == FC_PUSH_VPM || stack->push > 0) {
elements += 1;
}
break;
@@ -10245,6 +10254,7 @@ static inline void callstack_update_max_depth(struct 
r600_shader_ctx *ctx,
 
if (entries > stack->max_entries)
stack->max_entries = entries;
+   return elements;
 }
 
 static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
@@ -10268,7 +10278,7 @@ static inline void callstack_pop(struct r600_shader_ctx 
*ctx, unsigned reason)
}
 }
 
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
 {
switch (reason) {
case FC_PUSH_VPM:
@@ -10276,6 +10286,7 @@ static inline void callstack_push(struct 
r600_shader_ctx *ctx, unsigned reason)
break;
case FC_PUSH_WQM:
++ctx->bc->stack.push_wqm;
+   break;
case FC_LOOP:
++ctx->bc->stack.loop;
break;
@@ -10283,7 +10294,7 @@ static inline void callstack_push(struct 
r600_shader_ctx *ctx, unsigned reason)
assert(0);
}
 
-   callstack_update_max_depth(ctx, reason);
+   return callstack_update_max_depth(ctx, reason);
 }
 
 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
@@ -10367,12 +10378,25 @@ static int emit_if(struct r600_shader_ctx *ctx, int 
opcode,

[Mesa-dev] [PATCH] draw: fix alpha value for very short aa lines

2018-03-08 Thread sroland

From: Roland Scheidegger 

The logic would not work correctly for line lengths smaller than 1.0,
even a degenerated line with length 0 would still produce a fragment
with anyhwere between alpha 0.0 and 0.5.
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c  | 25 -
 src/gallium/auxiliary/draw/draw_pipe_stipple.c |  1 -
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c 
b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 14a4b2f..66a943a 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -370,7 +370,30 @@ aaline_line(struct draw_stage *stage, struct prim_header 
*header)
float t_l, t_w;
uint i;
 
-   half_length = 0.5f * sqrtf(dx * dx + dy * dy) + 0.5f;
+   half_length = 0.5f * sqrtf(dx * dx + dy * dy);
+
+   if (half_length < 0.5f) {
+  /*
+   * The logic we use for "normal" sized segments is incorrect
+   * for very short segments (basically because we only have
+   * one value to interpolate, not a distance to each endpoint).
+   * Therefore, we calculate half_length differently, so that for
+   * original line length (near) 0, we get alpha 0 - otherwise
+   * max alpha would still be 0.5. This also prevents us from
+   * artifacts due to degenerated lines (the endpoints being
+   * identical, which would still receive anywhere from alpha
+   * 0-0.5 otherwise) (at least the pstipple stage may generate
+   * such lines due to float inaccuracies if line length is very
+   * close to a integer).
+   * Might not be fully accurate neither (because the "strength" of
+   * the line is going to be determined by how close to the pixel
+   * center those 1 or 2 fragments are) but it's probably the best
+   * we can do.
+   */
+  half_length = 2.0f * half_length;
+   } else {
+  half_length = half_length + 0.5f;
+   }
 
t_w = half_width;
t_l = 0.5f;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c 
b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 3a44e96..d30572c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -150,7 +150,6 @@ stipple_line(struct draw_stage *stage, struct prim_header 
*header)
if (header->flags & DRAW_PIPE_RESET_STIPPLE)
   stipple->counter = 0;
 
-
/* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table.
 */
for (i = 0; i < length; i++) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] gbm: Add support for 10bpp BGR formats

Series is Tested-by: Ilia Mirkin 

kmscube and weston both start. The terminal app in weston shows
correct colors. I got a (weston) crash when trying to run
egltri_wayland from mesa-demos, but that could be for a million
different reasons.

On Thu, Mar 8, 2018 at 12:36 PM, Daniel Stone  wrote:
> Add support for XBGR2101010 and ABGR2101010 formats.
>
> Signed-off-by: Daniel Stone 
> ---
>  src/gbm/backends/dri/gbm_dri.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
> index df20db40218..b3d6ceb15a3 100644
> --- a/src/gbm/backends/dri/gbm_dri.c
> +++ b/src/gbm/backends/dri/gbm_dri.c
> @@ -580,6 +580,14 @@ static const struct gbm_dri_visual 
> gbm_dri_visuals_table[] = {
>   GBM_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ARGB2101010,
>   { 0x3ff0, 0x000ffc00, 0x03ff, 0xc000 },
> },
> +   {
> + GBM_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XBGR2101010,
> + { 0x03ff, 0x000ffc00, 0x3ff0, 0x },
> +   },
> +   {
> + GBM_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ABGR2101010,
> + { 0x03ff, 0x000ffc00, 0x3ff0, 0xc000 },
> +   },
>  };
>
>  /* The two GBM_BO_FORMAT_[XA]RGB formats alias the GBM_FORMAT_*
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radv: Update version to 1.1.70.

On 9 March 2018 at 09:52, Bas Nieuwenhuizen  wrote:
> Turns out they did not reset the patch number on release.

Oops and I think I suggested this, sorry!

for the series:

Reviewed-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_extensions.py | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index 469b09a160..bfee1f76fa 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -31,7 +31,7 @@ import xml.etree.cElementTree as et
>
>  from mako.template import Template
>
> -MAX_API_VERSION = '1.1.0'
> +MAX_API_VERSION = '1.1.70'
>
>  class Extension:
>  def __init__(self, name, ext_version, enable):
> @@ -274,7 +274,7 @@ uint32_t
>  radv_physical_device_api_version(struct radv_physical_device *dev)
>  {
>  if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit)
> -return VK_MAKE_VERSION(1, 1, 0);
> +return VK_MAKE_VERSION(1, 1, 70);
>  return VK_MAKE_VERSION(1, 0, 68);
>  }
>  """)
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/9] swr/rast: Added comment

2018-03-08 Thread Cherniak, Bruce

patch set (1 through 9) Reviewed-by: Bruce Cherniak  

> On Mar 7, 2018, at 7:32 PM, George Kyriazis  wrote:
> 
> ---
> src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp | 1 +
> 1 file changed, 1 insertion(+)
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
> b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
> index 67e415c..6fa60a1 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
> @@ -137,6 +137,7 @@ namespace SwrJit
> }
> else
> {
> +// maskload intrinsic expects integer mask operand in llvm >= 3.8
> mask = BITCAST(mask, VectorType::get(mInt32Ty, mVWidth));
> Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, 
> Intrinsic::x86_avx_maskload_ps_256);
> vResult = BITCAST(CALL(func, { src,mask }), 
> VectorType::get(mInt32Ty, mVWidth));
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v5 4/6] nouveau: Add framebuffer modifier support

Reviewed-by: Ilia Mirkin 

On Thu, Mar 8, 2018 at 8:05 AM, Thierry Reding  wrote:
> From: Thierry Reding 
>
> This adds support for framebuffer modifiers to Nouveau. This will be
> used by the Tegra driver to share metadata about the format of buffers
> (such as the tiling mode or compression).
>
> Changes in v2:
> - remove unused parameters to nouveau_buffer_create()
> - move format modifier query code to nvc0 backend
> - restrict format modifiers to 2D textures
> - implement ->query_dmabuf_modifiers()
>
> Changes in v4:
> - add UAPI include path on meson builds
>
> Changes in v5:
> - remove unnecessary includes
>
> Acked-by: Emil Velikov 
> Tested-by: Andre Heider 
> Signed-off-by: Thierry Reding 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v5 3/6] nouveau/nvc0: Extract common tile mode macro

Reviewed-by: Ilia Mirkin 

On Thu, Mar 8, 2018 at 8:05 AM, Thierry Reding  wrote:
> From: Thierry Reding 
>
> Add a new macro that can be used to extract the tiling mode from a
> tile_mode value. This is will be used to determine the number of GOBs
> used in block linear mode.
>
> Acked-by: Emil Velikov 
> Tested-by: Andre Heider 
> Signed-off-by: Thierry Reding 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_resource.h | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
> index 0d5f026d6e1c..c68a50948360 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
> @@ -6,14 +6,17 @@
>
>  #define NVC0_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0)
>
> +#define NVC0_TILE_MODE_X(m) (((m) >> 0) & 0xf)
> +#define NVC0_TILE_MODE_Y(m) (((m) >> 4) & 0xf)
> +#define NVC0_TILE_MODE_Z(m) (((m) >> 8) & 0xf)
>
> -#define NVC0_TILE_SHIFT_X(m) m) >> 0) & 0xf) + 6)
> -#define NVC0_TILE_SHIFT_Y(m) m) >> 4) & 0xf) + 3)
> -#define NVC0_TILE_SHIFT_Z(m) m) >> 8) & 0xf) + 0)
> +#define NVC0_TILE_SHIFT_X(m) (NVC0_TILE_MODE_X(m) + 6)
> +#define NVC0_TILE_SHIFT_Y(m) (NVC0_TILE_MODE_Y(m) + 3)
> +#define NVC0_TILE_SHIFT_Z(m) (NVC0_TILE_MODE_Z(m) + 0)
>
> -#define NVC0_TILE_SIZE_X(m) (64 << (((m) >> 0) & 0xf))
> -#define NVC0_TILE_SIZE_Y(m) ( 8 << (((m) >> 4) & 0xf))
> -#define NVC0_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf))
> +#define NVC0_TILE_SIZE_X(m) (64 << NVC0_TILE_MODE_X(m))
> +#define NVC0_TILE_SIZE_Y(m) ( 8 << NVC0_TILE_MODE_Y(m))
> +#define NVC0_TILE_SIZE_Z(m) ( 1 << NVC0_TILE_MODE_Z(m))
>
>  /* it's ok to mask only in the end because max value is 3 * 5 */
>
> --
> 2.16.2
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallium: silence __builtin_frame_address nonzero argument is unsafe warning

2018-03-08 Thread Timothy Arceri

Calling __builtin_frame_address with a nonzero argument is unsafe
but is sometimes done for debugging purposes. Since this code is
part of some debug util code I'm assuming that is the case here
and using GCC pragma to silence the warning.

Cc: José Fonseca 
---
 src/gallium/auxiliary/util/u_debug_stack.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_debug_stack.c 
b/src/gallium/auxiliary/util/u_debug_stack.c
index 6ddacdb362..974e639e89 100644
--- a/src/gallium/auxiliary/util/u_debug_stack.c
+++ b/src/gallium/auxiliary/util/u_debug_stack.c
@@ -265,7 +265,10 @@ debug_backtrace_capture(struct debug_stack_frame 
*backtrace,
 #endif
 
 #if defined(PIPE_CC_GCC)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-address"
frame_pointer = ((const void **)__builtin_frame_address(1));
+#pragma GCC diagnostic pop
 #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
__asm {
   mov frame_pointer, ebp
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] radv: Generate icd files.

If the api version is too low, the loader clamps the application
requested version to the advertized version, which messes with
which extensions are enabled.
---
 src/amd/vulkan/dev_icd.json.in|  7 --
 src/amd/vulkan/meson.build| 34 +++-
 src/amd/vulkan/radeon_icd.json.in |  7 --
 src/amd/vulkan/radv_icd.py| 47 +++
 4 files changed, 70 insertions(+), 25 deletions(-)
 delete mode 100644 src/amd/vulkan/dev_icd.json.in
 delete mode 100644 src/amd/vulkan/radeon_icd.json.in
 create mode 100644 src/amd/vulkan/radv_icd.py

diff --git a/src/amd/vulkan/dev_icd.json.in b/src/amd/vulkan/dev_icd.json.in
deleted file mode 100644
index dfd032cdfb..00
--- a/src/amd/vulkan/dev_icd.json.in
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-"file_format_version": "1.0.0",
-"ICD": {
-"library_path": "@libvulkan_radeon_path@",
-"api_version": "1.1.0"
-}
-}
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 98051560a5..61aa8c4fde 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -134,18 +134,30 @@ libvulkan_radeon = shared_library(
   install : true,
 )
 
-radv_data = configuration_data()
-radv_data.set('install_libdir', join_paths(get_option('prefix'), 
get_option('libdir')))
-radv_data.set('libvulkan_radeon_path', libvulkan_radeon.full_path())
-
-configure_file(
-  configuration : radv_data,
-  input : 'radeon_icd.json.in',
+radeon_icd = custom_target(
+  'radeon_icd',
+  input : 'radv_icd.py',
   output : 'radeon_icd.@0@.json'.format(host_machine.cpu()),
+  command : [
+prog_python2, '@INPUT@',
+'--lib-path', join_paths(get_option('prefix'), get_option('libdir')),
+'--out', '@OUTPUT@',
+  ],
+  depend_files : files('radv_extensions.py'),
+  build_by_default : true,
   install_dir : with_vulkan_icd_dir,
+  install : true,
 )
-configure_file(
-  configuration : radv_data,
-  input : 'dev_icd.json.in',
-  output : 'dev_icd.json'
+
+radv_dev_icd = custom_target(
+  'radv_dev_icd',
+  input : 'radv_icd.py',
+  output : 'dev_icd.json',
+  command : [
+prog_python2, '@INPUT@', '--lib-path', meson.current_build_dir(),
+'--out', '@OUTPUT@'
+  ],
+  depend_files : files('radv_extensions.py'),
+  build_by_default : true,
+  install : false,
 )
diff --git a/src/amd/vulkan/radeon_icd.json.in 
b/src/amd/vulkan/radeon_icd.json.in
deleted file mode 100644
index a99cb80ee9..00
--- a/src/amd/vulkan/radeon_icd.json.in
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-"file_format_version": "1.0.0",
-"ICD": {
-"library_path": "@install_libdir@/libvulkan_radeon.so",
-"api_version": "1.1.0"
-}
-}
diff --git a/src/amd/vulkan/radv_icd.py b/src/amd/vulkan/radv_icd.py
new file mode 100644
index 00..78ed379bd2
--- /dev/null
+++ b/src/amd/vulkan/radv_icd.py
@@ -0,0 +1,47 @@
+# Copyright 2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import json
+import os.path
+
+from radv_extensions import *
+
+if __name__ == '__main__':
+parser = argparse.ArgumentParser()
+parser.add_argument('--out', help='Output json file.', required=True)
+parser.add_argument('--lib-path', help='Path to libvulkan_radeon.so')
+args = parser.parse_args()
+
+path = 'libvulkan_radeon.so'
+if args.lib_path:
+path = os.path.join(args.lib_path, path)
+
+json_data = {
+'file_format_version': '1.0.0',
+'ICD': {
+'library_path': path,
+'api_version': str(MAX_API_VERSION),
+},
+}
+
+with open(args.out, 'w') as f:
+json.dump(json_data, f, indent = 4, sort_keys=True)
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radv: Update version to 1.1.70.

Turns out they did not reset the patch number on release.
---
 src/amd/vulkan/radv_extensions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 469b09a160..bfee1f76fa 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -31,7 +31,7 @@ import xml.etree.cElementTree as et
 
 from mako.template import Template
 
-MAX_API_VERSION = '1.1.0'
+MAX_API_VERSION = '1.1.70'
 
 class Extension:
 def __init__(self, name, ext_version, enable):
@@ -274,7 +274,7 @@ uint32_t
 radv_physical_device_api_version(struct radv_physical_device *dev)
 {
 if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit)
-return VK_MAKE_VERSION(1, 1, 0);
+return VK_MAKE_VERSION(1, 1, 70);
 return VK_MAKE_VERSION(1, 0, 68);
 }
 """)
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac/nir: Add workaround for GFX9 buffer views.

On GFX9 whether the buffer size is interpreted as elements or bytes
depends on whether IDXEN is enabled in the instruction. If the index
is a constant zero, LLVM optimizes IDXEN to 0.

Now the size in elements is interpreted in bytes which of course
results in out of bounds accesses.

The correct fix is most likely to disable the LLVM optimization,
but we need something to work with LLVM <= 6.0.

radeonsi does the max between stride and element count on the CPU
but that results in the size intrinsics returning the wrong size
for the buffer. This would cause CTS errors for radv.

v2: Also include the store changes.

Fixes: e38685cc62e 'Revert "radv: disable support for VEGA for now."'
---
 src/amd/common/ac_llvm_build.c  | 24 
 src/amd/common/ac_llvm_build.h  | 10 ++
 src/amd/common/ac_nir_to_llvm.c | 39 ---
 src/amd/common/ac_shader_abi.h  |  4 
 4 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9851cafb7f..0fe39a97cb 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1082,6 +1082,30 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx,
   can_speculate, true);
 }
 
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  unsigned num_channels,
+  bool glc,
+  bool can_speculate)
+{
+   LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, 
LLVMConstInt(ctx->i32, 2, 0), "");
+   LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, 
LLVMConstInt(ctx->i32, 1, 0), "");
+   stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 
0), "");
+
+   LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
+ 
LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
+ elem_count, stride, "");
+
+   LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, 
new_elem_count,
+  LLVMConstInt(ctx->i32, 
2, 0), "");
+
+   return ac_build_buffer_load_common(ctx, new_rsrc, vindex, voffset,
+  num_channels, glc, false,
+  can_speculate, true);
+}
+
 /**
  * Set range metadata on an instruction.  This can only be used on load and
  * call instructions.  If you know an instruction can only produce the values
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index c080381d21..e469668f08 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -242,6 +242,16 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx,
 bool glc,
 bool can_speculate);
 
+/* load_format that handles the stride & element count better if idxen is
+ * disabled by LLVM. */
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  unsigned num_channels,
+  bool glc,
+  bool can_speculate);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b85069860..8ec5002c47 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2263,12 +2263,21 @@ static LLVMValueRef build_tex_intrinsic(struct 
ac_nir_context *ctx,
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(>dest.ssa);
 
-   return ac_build_buffer_load_format(>ac,
-  args->resource,
-  args->addr,
-  ctx->ac.i32_0,
-  util_last_bit(mask),
-  false, true);
+   if (ctx->abi->gfx9_stride_size_workaround) {
+   return ac_build_buffer_load_format_gfx9_safe(>ac,
+

Re: [Mesa-dev] [PATCH 03/11] i965: perf: store sysfs device entry into context

On Thu, Mar 8, 2018 at 2:39 PM, Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> On 08/03/18 22:36, Jason Ekstrand wrote:
>
>
>> hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
>>struct brw_perf_query_info *query = entry->data;
>> -  char config_path[256];
>> +  char config_path[280];
>>
>
> What's with the bump?
>
> Because the compiler is complaining.
> It's able to see the length of the arguments (brw->perfquery.sysfs_dev_dir
> is already 256 + the other arguments...)
>
> It's like compiler are getting really good or something ;)
>

Wow, that's crazy.  First 6 are R-b me.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] nir: add nir_opt_move_load_ubo() optimization pass

On 03/08/2018 06:50 AM, Samuel Pitoiset wrote:
> This pass moves load UBO operations just before their first use,
> loosely based on nir_opt_move_comparisons.

If I'm reading this correctly, it moves UBO loads closer to the first
use in the same block.  My assumption is the benefit in the next patch
occurs because live ranges are smaller.  It seems like this could also
hurt performance since it may be harder for the schedule to hide the
latency of the load when register pressure is not an issue.  Have you
measured performance of running apps to see if this is an issue?

I'm mostly asking because Jason had a series for global code motion that
does, in some cases, the opposite of this patch by moving UBO loads up
to earlier blocks.

> Signed-off-by: Samuel Pitoiset 
> ---
>  src/compiler/Makefile.sources|   1 +
>  src/compiler/nir/meson.build |   1 +
>  src/compiler/nir/nir.h   |   2 +
>  src/compiler/nir/nir_opt_move_load_ubo.c | 116 
> +++
>  4 files changed, 120 insertions(+)
>  create mode 100644 src/compiler/nir/nir_opt_move_load_ubo.c
> 
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 37340ba809..55143dbc66 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -266,6 +266,7 @@ NIR_FILES = \
>   nir/nir_opt_intrinsics.c \
>   nir/nir_opt_loop_unroll.c \
>   nir/nir_opt_move_comparisons.c \
> + nir/nir_opt_move_load_ubo.c \
>   nir/nir_opt_peephole_select.c \
>   nir/nir_opt_remove_phis.c \
>   nir/nir_opt_shrink_load.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index a70c236b95..289bb9ea78 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -160,6 +160,7 @@ files_libnir = files(
>'nir_opt_intrinsics.c',
>'nir_opt_loop_unroll.c',
>'nir_opt_move_comparisons.c',
> +  'nir_opt_move_load_ubo.c',
>'nir_opt_peephole_select.c',
>'nir_opt_remove_phis.c',
>'nir_opt_shrink_load.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 5b28c727c8..4224da5f82 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2786,6 +2786,8 @@ bool nir_opt_loop_unroll(nir_shader *shader, 
> nir_variable_mode indirect_mask);
>  
>  bool nir_opt_move_comparisons(nir_shader *shader);
>  
> +bool nir_opt_move_load_ubo(nir_shader *shader);
> +
>  bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
>  
>  bool nir_opt_remove_phis(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_move_load_ubo.c 
> b/src/compiler/nir/nir_opt_move_load_ubo.c
> new file mode 100644
> index 00..642651152b
> --- /dev/null
> +++ b/src/compiler/nir/nir_opt_move_load_ubo.c
> @@ -0,0 +1,116 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + * Copyright © 2018 Valve Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +
> +/**
> + * \file nir_opt_move_load_ubo.c
> + *
> + * This pass moves load UBO operations just before their first use.
> + */
> +static bool
> +move_load_ubo_source(nir_src *src, nir_block *block, nir_instr *before)
> +{
> +   if (!src->is_ssa)
> +  return false;
> +
> +   nir_instr *src_instr = src->ssa->parent_instr;
> +
> +   if (src_instr->block == block &&
> +   src_instr->type == nir_instr_type_intrinsic &&
> +   nir_instr_as_intrinsic(src_instr)->intrinsic == 
> nir_intrinsic_load_ubo) {
> +
> +  exec_node_remove(_instr->node);
> +
> +  if (before)
> + exec_node_insert_node_before(>node, _instr->node);
> +  else
> + exec_list_push_tail(>instr_list, _instr->node);
> +
> +  return true;
> +   }
> +   return false;
> +}
> +
> +static bool
> +move_load_ubo_source_cb(nir_src *src, void *data)
> +{
> +

Re: [Mesa-dev] [PATCH] ac/nir: set number of channels for packed mrt exports

Reviewed-by: Bas Nieuwenhuizen 

On Thu, Mar 8, 2018 at 5:30 PM, Samuel Pitoiset
 wrote:
> Bit 0 enables VSRC0 (R in low bits, G high) and bit 2 enables
> VSRC1 (B in low bits, A high).
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index c785244dcc..d15e79d257 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -5910,22 +5910,27 @@ si_llvm_init_export_args(struct radv_shader_context 
> *ctx,
> break;
>
> case V_028714_SPI_SHADER_FP16_ABGR:
> +   args->enabled_channels = 0x5;
> packf = ac_build_cvt_pkrtz_f16;
> break;
>
> case V_028714_SPI_SHADER_UNORM16_ABGR:
> +   args->enabled_channels = 0x5;
> packf = ac_build_cvt_pknorm_u16;
> break;
>
> case V_028714_SPI_SHADER_SNORM16_ABGR:
> +   args->enabled_channels = 0x5;
> packf = ac_build_cvt_pknorm_i16;
> break;
>
> case V_028714_SPI_SHADER_UINT16_ABGR:
> +   args->enabled_channels = 0x5;
> packi = ac_build_cvt_pk_u16;
> break;
>
> case V_028714_SPI_SHADER_SINT16_ABGR:
> +   args->enabled_channels = 0x5;
> packi = ac_build_cvt_pk_i16;
> break;
>
> --
> 2.16.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac/nir: Add workaround for GFX9 buffer views.

On GFX9 whether the buffer size is interpreted as elements or bytes
depends on whether IDXEN is enabled in the instruction. If the index
is a constant zero, LLVM optimizes IDXEN to 0.

Now the size in elements is interpreted in bytes which of course
results in out of bounds accesses.

The correct fix is most likely to disable the LLVM optimization,
but we need something to work with LLVM <= 6.0.

radeonsi does the max between stride and element count on the CPU
but that results in the size intrinsics returning the wrong size
for the buffer. This would cause CTS errors for radv.

Fixes: e38685cc62e 'Revert "radv: disable support for VEGA for now."'
---
 src/amd/common/ac_llvm_build.c  | 24 
 src/amd/common/ac_llvm_build.h  | 10 ++
 src/amd/common/ac_nir_to_llvm.c | 22 --
 src/amd/common/ac_shader_abi.h  |  4 
 4 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9851cafb7f..0fe39a97cb 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1082,6 +1082,30 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx,
   can_speculate, true);
 }
 
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  unsigned num_channels,
+  bool glc,
+  bool can_speculate)
+{
+   LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, 
LLVMConstInt(ctx->i32, 2, 0), "");
+   LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, 
LLVMConstInt(ctx->i32, 1, 0), "");
+   stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 
0), "");
+
+   LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
+ 
LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
+ elem_count, stride, "");
+
+   LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, 
new_elem_count,
+  LLVMConstInt(ctx->i32, 
2, 0), "");
+
+   return ac_build_buffer_load_common(ctx, new_rsrc, vindex, voffset,
+  num_channels, glc, false,
+  can_speculate, true);
+}
+
 /**
  * Set range metadata on an instruction.  This can only be used on load and
  * call instructions.  If you know an instruction can only produce the values
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index c080381d21..e469668f08 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -242,6 +242,16 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx,
 bool glc,
 bool can_speculate);
 
+/* load_format that handles the stride & element count better if idxen is
+ * disabled by LLVM. */
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  unsigned num_channels,
+  bool glc,
+  bool can_speculate);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b85069860..fe239d160d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2263,12 +2263,21 @@ static LLVMValueRef build_tex_intrinsic(struct 
ac_nir_context *ctx,
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(>dest.ssa);
 
-   return ac_build_buffer_load_format(>ac,
-  args->resource,
-  args->addr,
-  ctx->ac.i32_0,
-  util_last_bit(mask),
-  false, true);
+   if (ctx->abi->gfx9_stride_size_workaround) {
+   return ac_build_buffer_load_format_gfx9_safe(>ac,
+

Re: [Mesa-dev] [PATCH 2/2] radv: run nir_opt_move_load_ubo

2018-03-08 Thread Timothy Arceri

I gave this a run on radeonsi and the results were mixed. On the one 
hand we reduced some spills on the other hand we increase VGPR use and 
max waves dropped. I wonder if we should look more closely into whats 
going on here.


72280 shaders in 43328 tests
Totals:
SGPRS: 2930328 -> 2922464 (-0.27 %)
VGPRS: 1736292 -> 1738480 (0.13 %)
Spilled SGPRs: 9430 -> 8917 (-5.44 %)
Spilled VGPRs: 57 -> 53 (-7.02 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 48 -> 40 (-16.67 %) dwords per thread
Code Size: 63877120 -> 63762236 (-0.18 %) bytes
LDS: 438 -> 438 (0.00 %) blocks
Max Waves: 556299 -> 555981 (-0.06 %)
Wait states: 0 -> 0 (0.00 %)

On 09/03/18 01:50, Samuel Pitoiset wrote:

Polaris10:
Totals from affected shaders:
SGPRS: 106656 -> 105952 (-0.66 %)
VGPRS: 73464 -> 73400 (-0.09 %)
Spilled SGPRs: 7121 -> 6861 (-3.65 %)
Code Size: 4157792 -> 4158716 (0.02 %) bytes
Max Waves: 9316 -> 9330 (0.15 %)

Vega10:
Totals from affected shaders:
SGPRS: 106720 -> 106032 (-0.64 %)
VGPRS: 67828 -> 67752 (-0.11 %)
Spilled SGPRs: 7113 -> 6853 (-3.66 %)
Code Size: 3704432 -> 3705112 (0.02 %) bytes
Max Waves: 10547 -> 10571 (0.23 %)

Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_shader.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 85672e600d..6a1db81660 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -152,6 +152,7 @@ radv_optimize_nir(struct nir_shader *shader)
  } while (progress);
  
  NIR_PASS(progress, shader, nir_opt_shrink_load);

+NIR_PASS(progress, shader, nir_opt_move_load_ubo);
  }
  
  nir_shader *



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 03/11] i965: perf: store sysfs device entry into context

2018-03-08 Thread Lionel Landwerlin


On 08/03/18 22:36, Jason Ekstrand wrote:



    hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
       struct brw_perf_query_info *query = entry->data;
-      char config_path[256];
+      char config_path[280];


What's with the bump?

Because the compiler is complaining.
It's able to see the length of the arguments 
(brw->perfquery.sysfs_dev_dir is already 256 + the other arguments...)


It's like compiler are getting really good or something ;)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 03/11] i965: perf: store sysfs device entry into context

On Thu, Mar 8, 2018 at 7:42 AM, Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> We want to reuse it later on.
>
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |   3 +
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 146
> +++---
>  2 files changed, 73 insertions(+), 76 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> index d6e3c7807f7..d3e7c71207b 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1189,6 +1189,9 @@ struct brw_context
> */
>struct hash_table *oa_metrics_table;
>
> +  /* Location of the device's sysfs entry. */
> +  char sysfs_dev_dir[256];
> +
>struct brw_perf_query_info *queries;
>int n_queries;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 622c2d2d950..a084b30fe7c 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -318,6 +318,47 @@ brw_perf_query(struct gl_perf_query_object *o)
>
>  /***
> ***/
>
> +static bool
> +read_file_uint64(const char *file, uint64_t *val)
> +{
> +char buf[32];
> +int fd, n;
> +
> +fd = open(file, 0);
> +if (fd < 0)
> +   return false;
> +while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 &&
> +   errno == EINTR);
> +close(fd);
> +if (n < 0)
> +   return false;
> +
> +buf[n] = '\0';
> +*val = strtoull(buf, NULL, 0);
> +
> +return true;
> +}
> +
> +static bool
> +read_sysfs_drm_device_file_uint64(struct brw_context *brw,
> +  const char *file,
> +  uint64_t *value)
> +{
> +   char buf[512];
> +   int len;
> +
> +   len = snprintf(buf, sizeof(buf), "%s/%s",
> +  brw->perfquery.sysfs_dev_dir, file);
> +   if (len < 0 || len >= sizeof(buf)) {
> +  DBG("Failed to concatenate sys filename to read u64 from\n");
> +  return false;
> +   }
> +
> +   return read_file_uint64(buf, value);
> +}
> +
> +/**
> /
> +
>  static bool
>  brw_is_perf_query_ready(struct gl_context *ctx,
>  struct gl_perf_query_object *o);
> @@ -1746,27 +1787,6 @@ init_pipeline_statistic_query_registers(struct
> brw_context *brw)
> query->data_size = sizeof(uint64_t) * query->n_counters;
>  }
>
> -static bool
> -read_file_uint64(const char *file, uint64_t *val)
> -{
> -char buf[32];
> -int fd, n;
> -
> -fd = open(file, 0);
> -if (fd < 0)
> -   return false;
> -while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 &&
> -   errno == EINTR);
> -close(fd);
> -if (n < 0)
> -   return false;
> -
> -buf[n] = '\0';
> -*val = strtoull(buf, NULL, 0);
> -
> -return true;
> -}
> -
>  static void
>  register_oa_config(struct brw_context *brw,
> const struct brw_perf_query_info *query,
> @@ -1780,14 +1800,14 @@ register_oa_config(struct brw_context *brw,
>  }
>
>  static void
> -enumerate_sysfs_metrics(struct brw_context *brw, const char
> *sysfs_dev_dir)
> +enumerate_sysfs_metrics(struct brw_context *brw)
>  {
> char buf[256];
> DIR *metricsdir = NULL;
> struct dirent *metric_entry;
> int len;
>
> -   len = snprintf(buf, sizeof(buf), "%s/metrics", sysfs_dev_dir);
> +   len = snprintf(buf, sizeof(buf), "%s/metrics",
> brw->perfquery.sysfs_dev_dir);
> if (len < 0 || len >= sizeof(buf)) {
>DBG("Failed to concatenate path to sysfs metrics/ directory\n");
>return;
> @@ -1814,7 +1834,7 @@ enumerate_sysfs_metrics(struct brw_context *brw,
> const char *sysfs_dev_dir)
>   uint64_t id;
>
>   len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id",
> -sysfs_dev_dir, metric_entry->d_name);
> +brw->perfquery.sysfs_dev_dir,
> metric_entry->d_name);
>   if (len < 0 || len >= sizeof(buf)) {
>  DBG("Failed to concatenate path to sysfs metric id file\n");
>  continue;
> @@ -1834,37 +1854,18 @@ enumerate_sysfs_metrics(struct brw_context *brw,
> const char *sysfs_dev_dir)
>  }
>
>  static bool
> -read_sysfs_drm_device_file_uint64(struct brw_context *brw,
> -  const char *sysfs_dev_dir,
> -  const char *file,
> -  uint64_t *value)
> -{
> -   char buf[512];
> -   int len;
> -
> -   len = snprintf(buf, sizeof(buf), "%s/%s", sysfs_dev_dir, file);
> -   if (len < 0 || len >= sizeof(buf)) {
> -  DBG("Failed to concatenate sys filename to read u64 from\n");
> -  return false;
> -   }
> -
> -   return

Re: [Mesa-dev] [PATCH 2/4] spirv: add support for SPV_AMD_shader_trinary_minmax

On 03/08/2018 02:23 PM, Ilia Mirkin wrote:
> On Thu, Mar 8, 2018 at 5:22 PM, Ilia Mirkin  wrote:
>> On Thu, Mar 8, 2018 at 5:14 PM, Daniel Schürmann
>>  wrote:
>>> From: Dave Airlie 
>>>
>>> Signed-off-by: Dave Airlie 
>>> ---
>>>  src/compiler/shader_info.h|  1 +
>>>  src/compiler/spirv/spirv_to_nir.c |  3 +++
>>>  src/compiler/spirv/vtn_amd.c  | 52 
>>> +++
>>>  src/compiler/spirv/vtn_private.h  |  2 ++
>>>  4 files changed, 58 insertions(+)
>>>
>>> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
>>> index b1e200070f..01a3060352 100644
>>> --- a/src/compiler/shader_info.h
>>> +++ b/src/compiler/shader_info.h
>>> @@ -51,6 +51,7 @@ struct spirv_supported_capabilities {
>>> bool subgroup_quad;
>>> bool subgroup_shuffle;
>>> bool subgroup_vote;
>>> +   bool AMD_shader_trinary_minmax;
> 
> Oh, and all the others are without the AMD/EXT/whatever prefix... not
> sure what people want the convention to be. But so far it should just
> be "shader_trinary_minmax".

I would think that trinary_minmax should be sufficient.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] nir: add support for min/max/median of 3 srcs

This patch is

Reviewed-by: Ian Romanick 

I think we should also add a flag for lowering this instruction as is
done in GLSL IR.  You should then add some patterns to
nir_opt_algebraic.py that recognize open-code versions.  It would also
be interesting to detect clamp() patterns and convert them to med3.

I don't have any expectation that you would implement the actual lowering.

On 03/08/2018 02:14 PM, Daniel Schürmann wrote:
> From: Dave Airlie 
> 
> These are needed for SPV_AMD_shader_trinary_minmax,
> the AMD HW supports these.
> 
> Co-authored-by: Daniel Schürmann 
> Signed-off-by: Dave Airlie 
> ---
>  src/compiler/nir/nir_opcodes.py | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index 65d1320062..fdf90ef3f5 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -658,6 +658,20 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
>  
>  
>  triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
> +
> +# 3 way min/max/med
> +triop("fmin3", tfloat, "fminf(src0, fminf(src1, src2))")
> +triop("imin3", tint, "MIN2(src0, MIN2(src1, src2))")
> +triop("umin3", tuint, "MIN2(src0, MIN2(src1, src2))")
> +
> +triop("fmax3", tfloat, "fmaxf(src0, fmaxf(src1, src2))")
> +triop("imax3", tint, "MAX2(src0, MAX2(src1, src2))")
> +triop("umax3", tuint, "MAX2(src0, MAX2(src1, src2))")
> +
> +triop("fmed3", tfloat, "fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, 
> src1))")
> +triop("imed3", tint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
> +triop("umed3", tuint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
> +
>  opcode("bcsel", 0, tuint, [0, 0, 0],
>[tbool, tuint, tuint], "", "src0 ? src1 : src2")
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/4] spirv: add support for SPV_AMD_shader_trinary_minmax

On Thu, Mar 8, 2018 at 5:14 PM, Daniel Schürmann
 wrote:
> From: Dave Airlie 
>
> Signed-off-by: Dave Airlie 
> ---
>  src/compiler/shader_info.h|  1 +
>  src/compiler/spirv/spirv_to_nir.c |  3 +++
>  src/compiler/spirv/vtn_amd.c  | 52 
> +++
>  src/compiler/spirv/vtn_private.h  |  2 ++
>  4 files changed, 58 insertions(+)
>
> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
> index b1e200070f..01a3060352 100644
> --- a/src/compiler/shader_info.h
> +++ b/src/compiler/shader_info.h
> @@ -51,6 +51,7 @@ struct spirv_supported_capabilities {
> bool subgroup_quad;
> bool subgroup_shuffle;
> bool subgroup_vote;
> +   bool AMD_shader_trinary_minmax;
>  };
>
>  /* The supported extensions which add extended instructions */
> diff --git a/src/compiler/spirv/spirv_to_nir.c 
> b/src/compiler/spirv/spirv_to_nir.c
> index 6a358c5973..ddd1b8fe79 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -376,6 +376,9 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
>} else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0)
>  && (b->options && b->options->exts.AMD_gcn_shader)) {
>   val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
> +  } else if ((strcmp((const char *)[2], 
> "SPV_AMD_shader_trinary_minmax") == 0)
> +&& (b->options && 
> b->options->caps.AMD_shader_trinary_minmax)) {
> + val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
>} else {
>   vtn_fail("Unsupported extension");
>}
> diff --git a/src/compiler/spirv/vtn_amd.c b/src/compiler/spirv/vtn_amd.c
> index b2b3e055f0..313e015f41 100644
> --- a/src/compiler/spirv/vtn_amd.c
> +++ b/src/compiler/spirv/vtn_amd.c
> @@ -55,3 +55,55 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder 
> *b, uint32_t ext_opcode
> }
> return true;
>  }
> +
> +bool
> +vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, 
> uint32_t ext_opcode,
> + const uint32_t *w, unsigned 
> count)
> +{
> +   struct nir_builder *nb = >nb;
> +   const struct glsl_type *dest_type =
> +  vtn_value(b, w[1], vtn_value_type_type)->type->type;
> +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
> +   val->ssa = vtn_create_ssa_value(b, dest_type);
> +
> +   unsigned num_inputs = count - 5;
> +   assert(num_inputs == 3);
> +   nir_ssa_def *src[3] = { NULL, };
> +  for (unsigned i = 0; i < num_inputs; i++)

indent

> +  src[i] = vtn_ssa_value(b, w[i + 5])->def;

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/4] spirv: add support for SPV_AMD_shader_trinary_minmax

On Thu, Mar 8, 2018 at 5:22 PM, Ilia Mirkin  wrote:
> On Thu, Mar 8, 2018 at 5:14 PM, Daniel Schürmann
>  wrote:
>> From: Dave Airlie 
>>
>> Signed-off-by: Dave Airlie 
>> ---
>>  src/compiler/shader_info.h|  1 +
>>  src/compiler/spirv/spirv_to_nir.c |  3 +++
>>  src/compiler/spirv/vtn_amd.c  | 52 
>> +++
>>  src/compiler/spirv/vtn_private.h  |  2 ++
>>  4 files changed, 58 insertions(+)
>>
>> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
>> index b1e200070f..01a3060352 100644
>> --- a/src/compiler/shader_info.h
>> +++ b/src/compiler/shader_info.h
>> @@ -51,6 +51,7 @@ struct spirv_supported_capabilities {
>> bool subgroup_quad;
>> bool subgroup_shuffle;
>> bool subgroup_vote;
>> +   bool AMD_shader_trinary_minmax;

Oh, and all the others are without the AMD/EXT/whatever prefix... not
sure what people want the convention to be. But so far it should just
be "shader_trinary_minmax".

>>  };
>>
>>  /* The supported extensions which add extended instructions */
>> diff --git a/src/compiler/spirv/spirv_to_nir.c 
>> b/src/compiler/spirv/spirv_to_nir.c
>> index 6a358c5973..ddd1b8fe79 100644
>> --- a/src/compiler/spirv/spirv_to_nir.c
>> +++ b/src/compiler/spirv/spirv_to_nir.c
>> @@ -376,6 +376,9 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
>>} else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0)
>>  && (b->options && b->options->exts.AMD_gcn_shader)) {
>>   val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
>> +  } else if ((strcmp((const char *)[2], 
>> "SPV_AMD_shader_trinary_minmax") == 0)
>> +&& (b->options && 
>> b->options->caps.AMD_shader_trinary_minmax)) {
>> + val->ext_handler = 
>> vtn_handle_amd_shader_trinary_minmax_instruction;
>>} else {
>>   vtn_fail("Unsupported extension");
>>}
>> diff --git a/src/compiler/spirv/vtn_amd.c b/src/compiler/spirv/vtn_amd.c
>> index b2b3e055f0..313e015f41 100644
>> --- a/src/compiler/spirv/vtn_amd.c
>> +++ b/src/compiler/spirv/vtn_amd.c
>> @@ -55,3 +55,55 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder 
>> *b, uint32_t ext_opcode
>> }
>> return true;
>>  }
>> +
>> +bool
>> +vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, 
>> uint32_t ext_opcode,
>> + const uint32_t *w, 
>> unsigned count)
>> +{
>> +   struct nir_builder *nb = >nb;
>> +   const struct glsl_type *dest_type =
>> +  vtn_value(b, w[1], vtn_value_type_type)->type->type;
>> +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
>> +   val->ssa = vtn_create_ssa_value(b, dest_type);
>> +
>> +   unsigned num_inputs = count - 5;
>> +   assert(num_inputs == 3);
>> +   nir_ssa_def *src[3] = { NULL, };
>> +  for (unsigned i = 0; i < num_inputs; i++)
>
> indent
>
>> +  src[i] = vtn_ssa_value(b, w[i + 5])->def;
>
>   -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/11] i965: perf: reuse timescale base function from query

2018-03-08 Thread Lionel Landwerlin


On 08/03/18 21:17, Kenneth Graunke wrote:

On Thursday, March 8, 2018 7:42:49 AM PST Lionel Landwerlin wrote:

We already have the same function in brw_queryobj.c

Signed-off-by: Lionel Landwerlin 
---
  src/mesa/drivers/dri/i965/brw_performance_query.c | 13 ++---
  1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index a084b30fe7c..d0faf4a2cb2 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -613,15 +613,6 @@ drop_from_unaccumulated_query_list(struct brw_context *brw,
 reap_old_sample_buffers(brw);
  }
  
-static uint64_t

-timebase_scale(struct brw_context *brw, uint32_t u32_time_delta)
-{
-   const struct gen_device_info *devinfo = >screen->devinfo;
-   uint64_t tmp = ((uint64_t)u32_time_delta) * 10ull;
-
-   return tmp ? tmp / devinfo->timestamp_frequency : 0;
-}

This function isn't quite the same...the one in brw_queryobj.c doesn't
have the ternary.  But, the ternary is checking the /numerator/ for
zero, which seems kinda pointless... 0 / timestamp_frequency == 0...

Patches 1-6 are:
Reviewed-by: Kenneth Graunke 

Yeah I know. I don't know why it was there...
It's pretty much equivalent.

Thanks a lot!


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] ac: add support for trinary_minmax instructions

---
 src/amd/common/ac_nir_to_llvm.c | 53 +
 1 file changed, 53 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b85069860..e78f4e8dcd 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2040,6 +2040,59 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
in[chan] = ac_llvm_extract_elem(>ac, src[0], chan);
result = ac_build_intrinsic(>ac,  "llvm.amdgcn.cubeid",
ctx->ac.f32, in, 3, 
AC_FUNC_ATTR_READNONE);
+   }
+
+   case nir_op_fmin3:
+   result = emit_intrin_2f_param(>ac, "llvm.minnum",
+   ac_to_float_type(>ac, 
def_type), src[0], src[1]);
+   result = emit_intrin_2f_param(>ac, "llvm.minnum",
+   ac_to_float_type(>ac, 
def_type), result, src[2]);
+   break;
+   case nir_op_umin3:
+   result = emit_minmax_int(>ac, LLVMIntULT, src[0], src[1]);
+   result = emit_minmax_int(>ac, LLVMIntULT, result, src[2]);
+   break;
+   case nir_op_imin3:
+   result = emit_minmax_int(>ac, LLVMIntSLT, src[0], src[1]);
+   result = emit_minmax_int(>ac, LLVMIntSLT, result, src[2]);
+   break;
+   case nir_op_fmax3:
+   result = emit_intrin_2f_param(>ac, "llvm.maxnum",
+   ac_to_float_type(>ac, 
def_type), src[0], src[1]);
+   result = emit_intrin_2f_param(>ac, "llvm.maxnum",
+   ac_to_float_type(>ac, 
def_type), result, src[2]);
+   break;
+   case nir_op_umax3:
+   result = emit_minmax_int(>ac, LLVMIntUGT, src[0], src[1]);
+   result = emit_minmax_int(>ac, LLVMIntUGT, result, src[2]);
+   break;
+   case nir_op_imax3:
+   result = emit_minmax_int(>ac, LLVMIntSGT, src[0], src[1]);
+   result = emit_minmax_int(>ac, LLVMIntSGT, result, src[2]);
+   break;
+   case nir_op_fmed3: {
+   LLVMValueRef tmp1 = emit_intrin_2f_param(>ac, 
"llvm.minnum",
+   ac_to_float_type(>ac, 
def_type), src[0], src[1]);
+   LLVMValueRef tmp2 = emit_intrin_2f_param(>ac, 
"llvm.maxnum",
+   ac_to_float_type(>ac, 
def_type), src[0], src[1]);
+   tmp2 = emit_intrin_2f_param(>ac, "llvm.minnum",
+   ac_to_float_type(>ac, 
def_type), tmp2, src[2]);
+   result = emit_intrin_2f_param(>ac, "llvm.maxnum",
+   ac_to_float_type(>ac, 
def_type), tmp1, tmp2);
+   break;
+   }
+   case nir_op_imed3: {
+   LLVMValueRef tmp1 = emit_minmax_int(>ac, LLVMIntSLT, 
src[0], src[1]);
+   LLVMValueRef tmp2 = emit_minmax_int(>ac, LLVMIntSGT, 
src[0], src[1]);
+   tmp2 = emit_minmax_int(>ac, LLVMIntSLT, tmp2, src[2]);
+   result = emit_minmax_int(>ac, LLVMIntSGT, tmp1, tmp2);
+   break;
+   }
+   case nir_op_umed3: {
+   LLVMValueRef tmp1 = emit_minmax_int(>ac, LLVMIntULT, 
src[0], src[1]);
+   LLVMValueRef tmp2 = emit_minmax_int(>ac, LLVMIntUGT, 
src[0], src[1]);
+   tmp2 = emit_minmax_int(>ac, LLVMIntULT, tmp2, src[2]);
+   result = emit_minmax_int(>ac, LLVMIntUGT, tmp1, tmp2);
break;
}
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] nir: add support for min/max/median of 3 srcs

From: Dave Airlie 

These are needed for SPV_AMD_shader_trinary_minmax,
the AMD HW supports these.

Co-authored-by: Daniel Schürmann 
Signed-off-by: Dave Airlie 
---
 src/compiler/nir/nir_opcodes.py | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 65d1320062..fdf90ef3f5 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -658,6 +658,20 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
 
 
 triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
+
+# 3 way min/max/med
+triop("fmin3", tfloat, "fminf(src0, fminf(src1, src2))")
+triop("imin3", tint, "MIN2(src0, MIN2(src1, src2))")
+triop("umin3", tuint, "MIN2(src0, MIN2(src1, src2))")
+
+triop("fmax3", tfloat, "fmaxf(src0, fmaxf(src1, src2))")
+triop("imax3", tint, "MAX2(src0, MAX2(src1, src2))")
+triop("umax3", tuint, "MAX2(src0, MAX2(src1, src2))")
+
+triop("fmed3", tfloat, "fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, 
src1))")
+triop("imed3", tint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
+triop("umed3", tuint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
+
 opcode("bcsel", 0, tuint, [0, 0, 0],
   [tbool, tuint, tuint], "", "src0 ? src1 : src2")
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] spirv: add support for SPV_AMD_shader_trinary_minmax

From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 src/compiler/shader_info.h|  1 +
 src/compiler/spirv/spirv_to_nir.c |  3 +++
 src/compiler/spirv/vtn_amd.c  | 52 +++
 src/compiler/spirv/vtn_private.h  |  2 ++
 4 files changed, 58 insertions(+)

diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index b1e200070f..01a3060352 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -51,6 +51,7 @@ struct spirv_supported_capabilities {
bool subgroup_quad;
bool subgroup_shuffle;
bool subgroup_vote;
+   bool AMD_shader_trinary_minmax;
 };
 
 /* The supported extensions which add extended instructions */
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 6a358c5973..ddd1b8fe79 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -376,6 +376,9 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
   } else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0)
 && (b->options && b->options->exts.AMD_gcn_shader)) {
  val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
+  } else if ((strcmp((const char *)[2], "SPV_AMD_shader_trinary_minmax") 
== 0)
+&& (b->options && b->options->caps.AMD_shader_trinary_minmax)) 
{
+ val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction;
   } else {
  vtn_fail("Unsupported extension");
   }
diff --git a/src/compiler/spirv/vtn_amd.c b/src/compiler/spirv/vtn_amd.c
index b2b3e055f0..313e015f41 100644
--- a/src/compiler/spirv/vtn_amd.c
+++ b/src/compiler/spirv/vtn_amd.c
@@ -55,3 +55,55 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, 
uint32_t ext_opcode
}
return true;
 }
+
+bool
+vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, 
uint32_t ext_opcode,
+ const uint32_t *w, unsigned 
count)
+{
+   struct nir_builder *nb = >nb;
+   const struct glsl_type *dest_type =
+  vtn_value(b, w[1], vtn_value_type_type)->type->type;
+   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+   val->ssa = vtn_create_ssa_value(b, dest_type);
+
+   unsigned num_inputs = count - 5;
+   assert(num_inputs == 3);
+   nir_ssa_def *src[3] = { NULL, };
+  for (unsigned i = 0; i < num_inputs; i++)
+  src[i] = vtn_ssa_value(b, w[i + 5])->def;
+
+   switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) {
+   case FMin3AMD:
+  val->ssa->def = nir_fmin3(nb, src[0], src[1], src[2]);
+  break;
+   case UMin3AMD:
+  val->ssa->def = nir_umin3(nb, src[0], src[1], src[2]);
+  break;
+   case SMin3AMD:
+  val->ssa->def = nir_imin3(nb, src[0], src[1], src[2]);
+  break;
+   case FMax3AMD:
+  val->ssa->def = nir_fmax3(nb, src[0], src[1], src[2]);
+  break;
+   case UMax3AMD:
+  val->ssa->def = nir_umax3(nb, src[0], src[1], src[2]);
+  break;
+   case SMax3AMD:
+  val->ssa->def = nir_imax3(nb, src[0], src[1], src[2]);
+  break;
+   case FMid3AMD:
+  val->ssa->def = nir_fmed3(nb, src[0], src[1], src[2]);
+  break;
+   case UMid3AMD:
+  val->ssa->def = nir_umed3(nb, src[0], src[1], src[2]);
+  break;
+   case SMid3AMD:
+  val->ssa->def = nir_imed3(nb, src[0], src[1], src[2]);
+  break;
+   default:
+ unreachable("unknown opcode\n");
+ break;
+   }
+
+   return true;
+}
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index a8fa612384..76bb7fa6b1 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -735,4 +735,6 @@ vtn_u64_literal(const uint32_t *w)
 bool vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, uint32_t 
ext_opcode,
const uint32_t *words, unsigned 
count);
 
+bool vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, 
uint32_t ext_opcode,
+ const uint32_t *words, 
unsigned count);
 #endif /* _VTN_PRIVATE_H_ */
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] radv: enable VK_AMD_shader_trinary_minmax extension

---
 src/amd/vulkan/radv_extensions.py | 1 +
 src/amd/vulkan/radv_shader.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 469b09a160..12efaa04c6 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -94,6 +94,7 @@ EXTENSIONS = [
 Extension('VK_AMD_gcn_shader',1, True),
 Extension('VK_AMD_rasterization_order',   1, 
'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
 Extension('VK_AMD_shader_info',   1, True),
+Extension('VK_AMD_shader_trinary_minmax', 1, True),
 ]
 
 class VkVersion:
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 85672e600d..9073b7d043 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -214,6 +214,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
.multiview = true,
.subgroup_basic = true,
.variable_pointers = true,
+   .AMD_shader_trinary_minmax = true,
},
.exts = {
.AMD_gcn_shader = true,
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Don't write to user buffer in glGetTexParameterIuiv on error

2018-03-08 Thread Matt Turner

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: Don't write to user buffer in glGetTexParameterIuiv on error

From: Ian Romanick 

With some sets of optimization flags, GCC will generate warnings like
this:

src/mesa/main/texparam.c:2327:27: warning: ‘*((void *)+12)’ may be used 
uninitialized in this function [-Wmaybe-uninitialized]
 params[3] = ip[3];
 ~~^~~
src/mesa/main/texparam.c:2320:16: note: ‘*((void *)+12)’ was declared here
  GLint ip[4];
^~

ip is not initialized in cases where a GL error is generated.  In these
cases, we should *not* write to the user's buffer, so this is actually a
bug.  I wrote a new piglit test gl-3.0-texparameteri to show this bug.

I suspect that Coverity also detected this, but the scan site is
currently down.

Fixes: c2c507786 "main: Added entry points for glGetTextureParameteriv, Iiv, 
and Iuiv."
Signed-off-by: Ian Romanick 
---
 src/mesa/main/texparam.c | 28 ++--
 1 file changed, 2 insertions(+), 26 deletions(-)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 7cc9d9e..301407e 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -2306,30 +2306,6 @@ get_tex_parameterIiv(struct gl_context *ctx,
}
 }
 
-static void
-get_tex_parameterIuiv(struct gl_context *ctx,
-  struct gl_texture_object *obj,
-  GLenum pname, GLuint *params, bool dsa)
-{
-   switch (pname) {
-   case GL_TEXTURE_BORDER_COLOR:
-  COPY_4V(params, obj->Sampler.BorderColor.i);
-  break;
-   default:
-  {
- GLint ip[4];
- get_tex_parameteriv(ctx, obj, pname, ip, dsa);
- params[0] = ip[0];
- if (pname == GL_TEXTURE_SWIZZLE_RGBA_EXT ||
- pname == GL_TEXTURE_CROP_RECT_OES) {
-params[1] = ip[1];
-params[2] = ip[2];
-params[3] = ip[3];
- }
-  }
-   }
-}
-
 void GLAPIENTRY
 _mesa_GetTexParameterfv(GLenum target, GLenum pname, GLfloat *params)
 {
@@ -2382,7 +2358,7 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, 
GLuint *params)
if (!texObj)
   return;
 
-   get_tex_parameterIuiv(ctx, texObj, pname, params, false);
+   get_tex_parameterIiv(ctx, texObj, pname, (GLint *) params, false);
 }
 
 
@@ -2436,5 +2412,5 @@ _mesa_GetTextureParameterIuiv(GLuint texture, GLenum 
pname, GLuint *params)
if (!texObj)
   return;
 
-   get_tex_parameterIuiv(ctx, texObj, pname, params, true);
+   get_tex_parameterIiv(ctx, texObj, pname, (GLint *) params, true);
 }
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 104229] radeon_icd.i686.json api_version is 1.0.3

2018-03-08 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=104229

--- Comment #2 from mercuriete  ---
This bug is not longer valid due to this commit:

https://cgit.freedesktop.org/mesa/mesa/commit/?id=5b3979704df51f05a6f226ba3a10046df466d03d

Closing as resolved.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 104229] radeon_icd.i686.json api_version is 1.0.3

2018-03-08 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=104229

mercuriete  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/11] i965: perf: reuse timescale base function from query

2018-03-08 Thread Kenneth Graunke

On Thursday, March 8, 2018 7:42:49 AM PST Lionel Landwerlin wrote:
> We already have the same function in brw_queryobj.c
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 13 ++---
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index a084b30fe7c..d0faf4a2cb2 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -613,15 +613,6 @@ drop_from_unaccumulated_query_list(struct brw_context 
> *brw,
> reap_old_sample_buffers(brw);
>  }
>  
> -static uint64_t
> -timebase_scale(struct brw_context *brw, uint32_t u32_time_delta)
> -{
> -   const struct gen_device_info *devinfo = >screen->devinfo;
> -   uint64_t tmp = ((uint64_t)u32_time_delta) * 10ull;
> -
> -   return tmp ? tmp / devinfo->timestamp_frequency : 0;
> -}

This function isn't quite the same...the one in brw_queryobj.c doesn't
have the ternary.  But, the ternary is checking the /numerator/ for
zero, which seems kinda pointless... 0 / timestamp_frequency == 0...

Patches 1-6 are:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] meson: Use system_has_kms_drm in default driver selection

Yeah, I guess. Do we know for certain they don't work on any of the BSDs? If not
then I guess this is fine, and the other BSD maintainers can submit patches to
fix it.

Reviewed-by: Dylan Baker 

Quoting Greg V (2018-03-08 11:23:02)
> On 3/8/2018 9:41 PM, Dylan Baker wrote:
> > Do vulkan drivers work on all of the BSDs?
> I don't think anyone tried them on anything other than FreeBSD, but why 
> add extra conditionals?
> Building them by default makes them more noticeable for e.g. package 
> maintainers :)


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] u_vbuf/translate: pass max_index into the set_buffer.

From: Dave Airlie 

This fixes a memory trashing crash (not the test) seen with
dEQP-GLES3.stress.draw.unaligned_data.random.203
on virgl.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/util/u_vbuf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index d30a702210..95d7990c6c 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -448,7 +448,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct 
translate_key *key,
  map -= (ptrdiff_t)vb->stride * min_index;
   }
 
-  tr->set_buffer(tr, i, map, vb->stride, ~0);
+  tr->set_buffer(tr, i, map, vb->stride, info->max_index);
}
 
/* Translate. */
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/56] anv/entrypoints: Generalize the string map a bit

On Thu, Mar 8, 2018 at 11:58 AM, Eric Anholt  wrote:

> Jason Ekstrand  writes:
>
> > On Thu, Mar 8, 2018 at 8:45 AM, Dylan Baker  wrote:
> >
> >> Quoting Jason Ekstrand (2018-03-07 20:22:51)
> >> > Yes, that is what happened.  That said, wrote that patch in September
> and
> >> > you've had about 6 months to look at it.  The only particularly active
> >> Mesa
> >> > contributor who hasn't had access is Ilia.
> >>
> >> No, just no. Having a patch in a branch does not count, especially not
> in a
> >> closed branch. I have plenty of patches that have sat in branches for
> >> months,
> >> years even. You're saying it's okay for me to send them to the list and
> >> push
> >> them a couple hours later because I wrote them a long time ago?
> >
> >
> > No, that's not what I'm saying.  However, I think there's a difference
> > between a private branch that you've had sitting around for a while and a
> > mostly public branch that you've been pestering your coworkers to review
> > for the past 6 months and gotten zero takers.  Every single patch I sent
> > had been reviewed and many of them by multiple people.
> >
> > This is something that we as a community (and team) need to sort out.
> With
> > both hardware enabling and new extension work, we are working with
> > embargoes.  Sometimes large pieces of work go into enabling said hardware
> > and features.  This series was fairly small at 56 patches; If you look at
> > all of Vulkan 1.1, it's probably more like 500.  If we wait until it's
> > public to get code review, you may be looking at weeks or months before
> you
> > can land it.
> >
> > This problem is only getting worse now that the mesa project is getting
> > caught up on features.  It used to be that we could do basically
> everything
> > publicly because we were several whole GL versions behind and basically
> > zero feature work was embargoed.  The only people working with an embargo
> > were people doing hardware enabling and they were sending the patches out
> > months before the hardware was available to anyone so waiting a week or
> two
> > doesn't matter.  Now, basically everything we do that isn't refactoring
> or
> > optimization work has to happen behind closed doors.  It's unfortunate,
> but
> > it's also reality.
> >
> > How do we deal with that as an open-source community?  That's a good
> > question and one which I'm happy to discuss.  I'm not sure what the right
> > balance is here but the "it doesn't exist until it's public" model just
> > isn't fair to the people who are in the unfortunate circumstance of
> working
> > under an embargo.
> >
> > On Thu, Mar 8, 2018 at 10:37 AM, Michel Dänzer 
> wrote:
> >
> >> On 2018-03-08 06:10 PM, Dylan Baker wrote:
> >> >
> >> > When I was given commit access I was told that I should wait 24 hours
> >> > after sending patches unless they were trivial or fixed something
> >> > critical, ie, without them you can't compile or nothing works.
> >>
> >> FWIW, I think that's a good rule, and I follow it.
> >>
> >> If one doesn't wait for at least 24 hours, e.g. somebody living in a
> >> different timezone may not get a chance to send feedback before the
> >> patch is applied. So it's kind of implying one isn't interested in
> >> feedback from such people.
> >>
> >
> > I agree.  24 hours means one turn of the globe and pushing much faster
> than
> > that does sort-of imply that you don't care about that feedback.  In this
> > case, the only thing that's implied is that I don't care too much about
> > feedback from the 5% of the mesa community who doesn't have a Khronos
> > account.  Maybe that makes me a jerk, but I didn't think it did.
> >
> >
> >> > I know we've always given a lot of flexibility to vendor specific code
> >> > (i965 or nouveau), but you hope everyone can understand my frustration
> >> > with a 56 patch series that I sent review for 8 hours after it was
> >> > posted to the list and I got told "Oh, I merged that hours ago,
> >> > patches welcome."
> >>
> >> I can. I guess Jason got a bit carried away by the Vulkan 1.1
> excitement.
> >>
> >
> > Perhaps.  :-)  I do think that being there day-1 is important.  If
> nothing
> > else, it shows the rest of the graphics community (who already fears the
> > concept of open-source) that working in the open isn't going to cramp
> their
> > style.  If we can deliver full-featured and fully conformant Vulkan 1.1
> > drivers on day 1, then they can to.  I think that's an important message
> > for the open-source community to send.
>
> I completely agree here.
>
> We have git.  We can change code after it lands.  The value we as a
> project get from having day 1 support is huge, and the value of getting
> our python style polished before any patches land is... well, it doesn't
> even compare.
>
> Also, I feel that if the Vulkan driver implementors are happy with this
> Vulkan driver support code, then that trumps style

Re: [Mesa-dev] [PATCH 04/56] anv/entrypoints: Generalize the string map a bit

2018-03-08 Thread Eric Anholt

Jason Ekstrand  writes:

> On Thu, Mar 8, 2018 at 8:45 AM, Dylan Baker  wrote:
>
>> Quoting Jason Ekstrand (2018-03-07 20:22:51)
>> > Yes, that is what happened.  That said, wrote that patch in September and
>> > you've had about 6 months to look at it.  The only particularly active
>> Mesa
>> > contributor who hasn't had access is Ilia.
>>
>> No, just no. Having a patch in a branch does not count, especially not in a
>> closed branch. I have plenty of patches that have sat in branches for
>> months,
>> years even. You're saying it's okay for me to send them to the list and
>> push
>> them a couple hours later because I wrote them a long time ago?
>
>
> No, that's not what I'm saying.  However, I think there's a difference
> between a private branch that you've had sitting around for a while and a
> mostly public branch that you've been pestering your coworkers to review
> for the past 6 months and gotten zero takers.  Every single patch I sent
> had been reviewed and many of them by multiple people.
>
> This is something that we as a community (and team) need to sort out.  With
> both hardware enabling and new extension work, we are working with
> embargoes.  Sometimes large pieces of work go into enabling said hardware
> and features.  This series was fairly small at 56 patches; If you look at
> all of Vulkan 1.1, it's probably more like 500.  If we wait until it's
> public to get code review, you may be looking at weeks or months before you
> can land it.
>
> This problem is only getting worse now that the mesa project is getting
> caught up on features.  It used to be that we could do basically everything
> publicly because we were several whole GL versions behind and basically
> zero feature work was embargoed.  The only people working with an embargo
> were people doing hardware enabling and they were sending the patches out
> months before the hardware was available to anyone so waiting a week or two
> doesn't matter.  Now, basically everything we do that isn't refactoring or
> optimization work has to happen behind closed doors.  It's unfortunate, but
> it's also reality.
>
> How do we deal with that as an open-source community?  That's a good
> question and one which I'm happy to discuss.  I'm not sure what the right
> balance is here but the "it doesn't exist until it's public" model just
> isn't fair to the people who are in the unfortunate circumstance of working
> under an embargo.
>
> On Thu, Mar 8, 2018 at 10:37 AM, Michel Dänzer  wrote:
>
>> On 2018-03-08 06:10 PM, Dylan Baker wrote:
>> >
>> > When I was given commit access I was told that I should wait 24 hours
>> > after sending patches unless they were trivial or fixed something
>> > critical, ie, without them you can't compile or nothing works.
>>
>> FWIW, I think that's a good rule, and I follow it.
>>
>> If one doesn't wait for at least 24 hours, e.g. somebody living in a
>> different timezone may not get a chance to send feedback before the
>> patch is applied. So it's kind of implying one isn't interested in
>> feedback from such people.
>>
>
> I agree.  24 hours means one turn of the globe and pushing much faster than
> that does sort-of imply that you don't care about that feedback.  In this
> case, the only thing that's implied is that I don't care too much about
> feedback from the 5% of the mesa community who doesn't have a Khronos
> account.  Maybe that makes me a jerk, but I didn't think it did.
>
>
>> > I know we've always given a lot of flexibility to vendor specific code
>> > (i965 or nouveau), but you hope everyone can understand my frustration
>> > with a 56 patch series that I sent review for 8 hours after it was
>> > posted to the list and I got told "Oh, I merged that hours ago,
>> > patches welcome."
>>
>> I can. I guess Jason got a bit carried away by the Vulkan 1.1 excitement.
>>
>
> Perhaps.  :-)  I do think that being there day-1 is important.  If nothing
> else, it shows the rest of the graphics community (who already fears the
> concept of open-source) that working in the open isn't going to cramp their
> style.  If we can deliver full-featured and fully conformant Vulkan 1.1
> drivers on day 1, then they can to.  I think that's an important message
> for the open-source community to send.

I completely agree here.

We have git.  We can change code after it lands.  The value we as a
project get from having day 1 support is huge, and the value of getting
our python style polished before any patches land is... well, it doesn't
even compare.

Also, I feel that if the Vulkan driver implementors are happy with this
Vulkan driver support code, then that trumps style requests from others.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/5] radeonsi: remove chip_class parameter from si_lower_nir

On Wed, Mar 7, 2018 at 3:34 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> We can get it from si_screen.

Acked-by: Alex Deucher 

> ---
>  src/gallium/drivers/radeonsi/si_compute.c   | 3 +--
>  src/gallium/drivers/radeonsi/si_shader.h| 4 +---
>  src/gallium/drivers/radeonsi/si_shader_nir.c| 6 +++---
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 3 +--
>  4 files changed, 6 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 92d4514..46873cc 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -100,21 +100,21 @@ static void si_create_compute_state_async(void *job, 
> int thread_index)
> sel.screen = program->screen;
>
> if (program->ir_type == PIPE_SHADER_IR_TGSI) {
> tgsi_scan_shader(program->ir.tgsi, );
> sel.tokens = program->ir.tgsi;
> } else {
> assert(program->ir_type == PIPE_SHADER_IR_NIR);
> sel.nir = program->ir.nir;
>
> si_nir_scan_shader(sel.nir, );
> -   si_lower_nir(, program->compiler_ctx_state.chip_class);
> +   si_lower_nir();
> }
>
>
> sel.type = PIPE_SHADER_COMPUTE;
> sel.local_size = program->local_size;
> si_get_active_slot_masks(,
>  >active_const_and_shader_buffers,
>  >active_samplers_and_images);
>
> program->shader.selector = 
> @@ -179,21 +179,20 @@ static void *si_create_compute_state(
> program->ir.tgsi = tgsi_dup_tokens(cso->prog);
> if (!program->ir.tgsi) {
> FREE(program);
> return NULL;
> }
> } else {
> assert(cso->ir_type == PIPE_SHADER_IR_NIR);
> program->ir.nir = (struct nir_shader *) cso->prog;
> }
>
> -   program->compiler_ctx_state.chip_class = sctx->b.chip_class;
> program->compiler_ctx_state.debug = sctx->debug;
> program->compiler_ctx_state.is_debug_context = sctx->is_debug;
> p_atomic_inc(>num_shaders_created);
> util_queue_fence_init(>ready);
>
> struct util_async_debug_callback async_debug;
> bool wait =
> (sctx->debug.debug_message && !sctx->debug.async) ||
> sctx->is_debug ||
> si_can_dump_shader(sscreen, PIPE_SHADER_COMPUTE);
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
> b/src/gallium/drivers/radeonsi/si_shader.h
> index 23f9d20..f589789 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -300,22 +300,20 @@ enum {
> SI_FIX_FETCH_RGB_8, /* A = 1.0 */
> SI_FIX_FETCH_RGB_8_INT, /* A = 1 */
> SI_FIX_FETCH_RGB_16,
> SI_FIX_FETCH_RGB_16_INT,
>  };
>
>  struct si_shader;
>
>  /* State of the context creating the shader object. */
>  struct si_compiler_ctx_state {
> -   enum chip_class chip_class;
> -
> /* Should only be used by si_init_shader_selector_async and
>  * si_build_shader_variant if thread_index == -1 (non-threaded). */
> LLVMTargetMachineReftm;
>
> /* Used if thread_index == -1 or if debug.async is true. */
> struct pipe_debug_callback  debug;
>
> /* Used for creating the log string for gallium/ddebug. */
> boolis_debug_context;
>  };
> @@ -667,21 +665,21 @@ void si_shader_binary_read_config(struct 
> ac_shader_binary *binary,
>   struct si_shader_config *conf,
>   unsigned symbol_offset);
>  const char *si_get_shader_name(const struct si_shader *shader, unsigned 
> processor);
>
>  /* si_shader_nir.c */
>  void si_nir_scan_shader(const struct nir_shader *nir,
> struct tgsi_shader_info *info);
>  void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
>const struct tgsi_shader_info *info,
>struct tgsi_tessctrl_info *out);
> -void si_lower_nir(struct si_shader_selector *sel, enum chip_class 
> chip_class);
> +void si_lower_nir(struct si_shader_selector *sel);
>
>  /* Inline helpers. */
>
>  /* Return the pointer to the main shader part's pointer. */
>  static inline struct si_shader **
>  si_get_main_shader_part(struct si_shader_selector *sel,
> struct si_shader_key *key)
>  {
> if (key->as_ls)
> return >main_shader_part_ls;
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
>

Re: [Mesa-dev] [PATCH 2/3] meson: use relative paths in megadriver symlinks

Quoting Dylan Baker (2018-03-08 11:25:20)
> Quoting Greg V (2018-03-06 11:16:04)
> > e.g. libvdpau_radeonsi.so(.1(.0)) were pointing to the absolute
> > build-time path of libvdpau_radeonsi.so.1.0.0, which caused trouble
> > when packaging the libraries.
> > ---
> >  bin/install_megadrivers.py | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/bin/install_megadrivers.py b/bin/install_megadrivers.py
> > index 86bfa35918..ce947b4332 100755
> > --- a/bin/install_megadrivers.py
> > +++ b/bin/install_megadrivers.py
> > @@ -58,7 +58,7 @@ def main():
> >  while ext != '.so':
> >  if os.path.exists(name):
> >  os.unlink(name)
> > -os.symlink(driver, name)
> > +os.symlink(os.path.relpath(driver), name)
> 
> I think that driver is wrong here, I think that this should be
> `os.symlink(each, name)`
> 
> In my testing these generated the same code.

By "these" I mean os.path.relpath(driver) and each

> 
> Dylan
> 
> >  name, ext = os.path.splitext(name)
> >  finally:
> >  os.chdir(ret)
> > -- 
> > 2.16.2
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] meson: Use system_has_kms_drm in default driver selection

2018-03-08 Thread Greg V


On 3/8/2018 9:41 PM, Dylan Baker wrote:

Do vulkan drivers work on all of the BSDs?
I don't think anyone tried them on anything other than FreeBSD, but why 
add extra conditionals?
Building them by default makes them more noticeable for e.g. package 
maintainers :)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/7] ac: handle subgroup intrinsics

2018-03-08 Thread Michael Schellenberger Costa


HI Daniel,


Am 08.03.2018 um 18:10 schrieb Daniel Schürmann:

---
  src/amd/common/ac_nir_to_llvm.c | 66 +++--
  1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b85069860..0f4cc32f15 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4363,36 +4363,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = ac_build_ballot(>ac, get_src(ctx, instr->src[0]));
break;
case nir_intrinsic_read_invocation:
-   case nir_intrinsic_read_first_invocation: {
-   LLVMValueRef args[2];
-
-   /* Value */
-   args[0] = get_src(ctx, instr->src[0]);
-
-   unsigned num_args;
-   const char *intr_name;
-   if (instr->intrinsic == nir_intrinsic_read_invocation) {
-   num_args = 2;
-   intr_name = "llvm.amdgcn.readlane";
-
-   /* Invocation */
-   args[1] = get_src(ctx, instr->src[1]);
-   } else {
-   num_args = 1;
-   intr_name = "llvm.amdgcn.readfirstlane";
-   }
-
-   /* We currently have no other way to prevent LLVM from lifting 
the icmp
-* calls to a dominating basic block.
-*/
-   ac_build_optimization_barrier(>ac, [0]);
-
-   result = ac_build_intrinsic(>ac, intr_name,
-   ctx->ac.i32, args, num_args,
-   AC_FUNC_ATTR_READNONE |
-   AC_FUNC_ATTR_CONVERGENT);
+   result = ac_build_readlane(>ac, get_src(ctx, 
instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_read_first_invocation:
+   result = ac_build_readlane(>ac, get_src(ctx, 
instr->src[0]), NULL);
break;
-   }
case nir_intrinsic_load_subgroup_invocation:
result = ac_get_thread_id(>ac);
break;
@@ -4646,6 +4622,38 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
break;
}
+   case nir_intrinsic_shuffle:
+   result = ac_build_shuffle(>ac, get_src(ctx, instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_reduce:
+   result = ac_build_reduce(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0],
Identation here and below is off. Took me a second look to see 
instr->const_index[0] is just the argument.

--Michael

+   instr->const_index[1]);
+   break;
+   case nir_intrinsic_inclusive_scan:
+   result = ac_build_inclusive_scan(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_exclusive_scan:
+   result = ac_build_exclusive_scan(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_quad_broadcast: {
+   unsigned lane = instr->const_index[0];
+   result = ac_build_quad_swizzle(>ac, get_src(ctx, 
instr->src[0]),
+   lane, lane, lane, lane);
+   break;
+   }
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   result = ac_build_quad_swap(>ac, get_src(ctx, 
instr->src[0]),
+   instr->const_index[0]);
+   break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(>instr, stderr);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/7] ac: lower 64bit subgroup intrinsics

2018-03-08 Thread Michael Schellenberger Costa


Hi Daniel,


Am 08.03.2018 um 18:10 schrieb Daniel Schürmann:

---
  src/amd/common/ac_lower_subgroups.c | 50 ++---
  1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_lower_subgroups.c 
b/src/amd/common/ac_lower_subgroups.c
index d0782b481b..2be48e2ba1 100644
--- a/src/amd/common/ac_lower_subgroups.c
+++ b/src/amd/common/ac_lower_subgroups.c
@@ -26,9 +26,45 @@
  
  #include "ac_nir_to_llvm.h"
  
+static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, nir_intrinsic_instr *intrin) {

+   assert(intrin->src[0].ssa->bit_size == 64);
+   nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+   nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

The extra space looks before x/y looks wrong.

+   nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_ssa_dest_init(_x->instr, _x->dest, 1, 32, NULL);
+   nir_ssa_dest_init(_y->instr, _y->dest, 1, 32, NULL);
+   intr_x->src[0] = nir_src_for_ssa(x);
+   intr_y->src[0] = nir_src_for_ssa(y);
+   intr_x->const_index[0] = intr_y->const_index[0] = 
intrin->const_index[0];
+   intr_x->const_index[1] = intr_y->const_index[1] = 
intrin->const_index[1];
+   if (intrin->intrinsic == nir_intrinsic_read_invocation ||
+   intrin->intrinsic == nir_intrinsic_shuffle ||
+   intrin->intrinsic == nir_intrinsic_quad_broadcast) {

Indentation is off for the other conditions.

+   nir_src_copy(_x->src[1], >src[1], intr_x);
+   nir_src_copy(_y->src[1], >src[1], intr_y);
+   }
+   intr_x->num_components = 1;
+   intr_y->num_components = 1;
+   nir_builder_instr_insert(b, _x->instr);
+   nir_builder_instr_insert(b, _y->instr);
+   return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
+}


That said could you make a helper function:

static nir_intrinsic_instr 
*ac_lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr 
*intrin, unsigned int component) {
nir_ssa_def *comp;
if (component == 0)
comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
else
    comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
intr->src[0] = nir_src_for_ssa(comp);

intr->const_index[0] = intrin->const_index[0];
intr->const_index[1] = intrin->const_index[1];
if (intrin->intrinsic == nir_intrinsic_read_invocation ||
intrin->intrinsic == nir_intrinsic_shuffle ||
intrin->intrinsic == nir_intrinsic_quad_broadcast) {
nir_src_copy(>src[1], >src[1], intr);
}
intr->num_components = 1;
return intr;
}

And then simplify into:

static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, 
nir_intrinsic_instr *intrin) {
assert(intrin->src[0].ssa->bit_size == 64);
nir_intrinsic_instr *intr_x = 
ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 0);
nir_intrinsic_instr *intr_y = 
ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 1);

nir_builder_instr_insert(b, _x->instr);
nir_builder_instr_insert(b, _y->instr);
return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
}

--Michael


+
  static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_instr *intrin)
  {
switch(intrin->intrinsic) {
+   case nir_intrinsic_read_invocation:
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_shuffle:
+   case nir_intrinsic_quad_broadcast:
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   if (intrin->src[0].ssa->bit_size == 64)
+   return ac_lower_subgroups_64bit(b, intrin);
+   else
+   return NULL;
case nir_intrinsic_vote_ieq:
case nir_intrinsic_vote_feq: {
nir_intrinsic_instr *rfi =
@@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder 
*b, nir_intrinsic_inst
  1, intrin->src[0].ssa->bit_size, NULL);
nir_src_copy(>src[0], >src[0], rfi);
rfi->num_components = 1;
-
+   nir_ssa_def *first_lane;
+   if (intrin->src[0].ssa->bit_size == 64) {
+   first_lane = ac_lower_subgroups_64bit(b, rfi);
+   } else {
+   nir_builder_instr_insert(b, >instr);
+   first_lane = >dest.ssa;
+   }
nir_ssa_def *is_ne;
if (intrin->intrinsic ==

Re: [Mesa-dev] [PATCH 2/3] meson: use relative paths in megadriver symlinks

Quoting Greg V (2018-03-06 11:16:04)
> e.g. libvdpau_radeonsi.so(.1(.0)) were pointing to the absolute
> build-time path of libvdpau_radeonsi.so.1.0.0, which caused trouble
> when packaging the libraries.
> ---
>  bin/install_megadrivers.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/bin/install_megadrivers.py b/bin/install_megadrivers.py
> index 86bfa35918..ce947b4332 100755
> --- a/bin/install_megadrivers.py
> +++ b/bin/install_megadrivers.py
> @@ -58,7 +58,7 @@ def main():
>  while ext != '.so':
>  if os.path.exists(name):
>  os.unlink(name)
> -os.symlink(driver, name)
> +os.symlink(os.path.relpath(driver), name)

I think that driver is wrong here, I think that this should be
`os.symlink(each, name)`

In my testing these generated the same code.

Dylan

>  name, ext = os.path.splitext(name)
>  finally:
>  os.chdir(ret)
> -- 
> 2.16.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] autotools: include all meson.build files

2018-03-08 Thread Kyriazis, George

Reviewed-By: George Kyriazis 
>

On Mar 2, 2018, at 12:00 PM, Dylan Baker 
> wrote:

Otherwise SWR cannot be built with meson from an autotools generated
tarball, such as the 18.0.0-rc4 tarball.

CC: George Kyriazis 
>
CC: Emil Velikov >
Fixes: 16bf81383080 ("meson/swr: re-shuffle generated files")
Signed-off-by: Dylan Baker 
>
---
src/gallium/drivers/swr/Makefile.am | 2 ++
1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 2edaf666f11..13c7f8b7345 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -364,7 +364,9 @@ endif
EXTRA_DIST = \
SConscript \
meson.build \
+ rasterizer/jitter/meson.build \
rasterizer/codegen/meson.build \
+ rasterizer/core/backends/meson.build \
rasterizer/archrast/events.proto \
rasterizer/codegen/gen_llvm_ir_macros.py \
rasterizer/codegen/gen_llvm_types.py \
--
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/dri: fix OpenGL-OpenCL interop for GL_TEXTURE_BUFFER

On Wed, Mar 7, 2018 at 3:35 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> Tested by our OpenCL team.
>
> Fixes: 9c499e6759b26c5e "st/mesa: don't invoke st_finalize_texture & 
> st_convert_sampler for TBOs"

Acked-by: Alex Deucher 

> ---
>  src/gallium/state_trackers/dri/dri2.c | 58 
> ---
>  1 file changed, 34 insertions(+), 24 deletions(-)
>
> diff --git a/src/gallium/state_trackers/dri/dri2.c 
> b/src/gallium/state_trackers/dri/dri2.c
> index 2a3a2a8..31d17d4 100644
> --- a/src/gallium/state_trackers/dri/dri2.c
> +++ b/src/gallium/state_trackers/dri/dri2.c
> @@ -1870,53 +1870,63 @@ dri2_interop_export_object(__DRIcontext *_ctx,
> *miplevel is zero or if the GL texture object is incomplete."
> */
>if (!obj ||
>obj->Target != target ||
>!obj->_BaseComplete ||
>(in->miplevel > 0 && !obj->_MipmapComplete)) {
>   simple_mtx_unlock(>Shared->Mutex);
>   return MESA_GLINTEROP_INVALID_OBJECT;
>}
>
> -  /* From OpenCL 2.0 SDK, clCreateFromGLTexture:
> -   *   "CL_INVALID_MIP_LEVEL if miplevel is less than the value of
> -   *levelbase (for OpenGL implementations) or zero (for OpenGL ES
> -   *implementations); or greater than the value of q (for both OpenGL
> -   *and OpenGL ES). levelbase and q are defined for the texture in
> -   *section 3.8.10 (Texture Completeness) of the OpenGL 2.1
> -   *specification and section 3.7.10 of the OpenGL ES 2.0."
> -   */
> -  if (in->miplevel < obj->BaseLevel || in->miplevel > obj->_MaxLevel) {
> - simple_mtx_unlock(>Shared->Mutex);
> - return MESA_GLINTEROP_INVALID_MIP_LEVEL;
> -  }
> -
> -  if (!st_finalize_texture(ctx, st->pipe, obj, 0)) {
> - simple_mtx_unlock(>Shared->Mutex);
> - return MESA_GLINTEROP_OUT_OF_RESOURCES;
> -  }
> +  if (target == GL_TEXTURE_BUFFER) {
> + struct st_buffer_object *stBuf =
> +st_buffer_object(obj->BufferObject);
>
> -  res = st_get_texobj_resource(obj);
> -  if (!res) {
> - /* Incomplete texture buffer object? This shouldn't really occur. */
> - simple_mtx_unlock(>Shared->Mutex);
> - return MESA_GLINTEROP_INVALID_OBJECT;
> -  }
> + if (!stBuf || !stBuf->buffer) {
> +/* this shouldn't happen */
> +simple_mtx_unlock(>Shared->Mutex);
> +return MESA_GLINTEROP_INVALID_OBJECT;
> + }
> + res = stBuf->buffer;
>
> -  if (target == GL_TEXTURE_BUFFER) {
>   out->internal_format = obj->BufferObjectFormat;
>   out->buf_offset = obj->BufferOffset;
>   out->buf_size = obj->BufferSize == -1 ? obj->BufferObject->Size :
>   obj->BufferSize;
>
>   obj->BufferObject->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
>} else {
> + /* From OpenCL 2.0 SDK, clCreateFromGLTexture:
> +  *   "CL_INVALID_MIP_LEVEL if miplevel is less than the value of
> +  *levelbase (for OpenGL implementations) or zero (for OpenGL ES
> +  *implementations); or greater than the value of q (for both 
> OpenGL
> +  *and OpenGL ES). levelbase and q are defined for the texture in
> +  *section 3.8.10 (Texture Completeness) of the OpenGL 2.1
> +  *specification and section 3.7.10 of the OpenGL ES 2.0."
> +  */
> + if (in->miplevel < obj->BaseLevel || in->miplevel > obj->_MaxLevel) 
> {
> +simple_mtx_unlock(>Shared->Mutex);
> +return MESA_GLINTEROP_INVALID_MIP_LEVEL;
> + }
> +
> + if (!st_finalize_texture(ctx, st->pipe, obj, 0)) {
> +simple_mtx_unlock(>Shared->Mutex);
> +return MESA_GLINTEROP_OUT_OF_RESOURCES;
> + }
> +
> + res = st_get_texobj_resource(obj);
> + if (!res) {
> +/* Incomplete texture buffer object? This shouldn't really 
> occur. */
> +simple_mtx_unlock(>Shared->Mutex);
> +return MESA_GLINTEROP_INVALID_OBJECT;
> + }
> +
>   out->internal_format = obj->Image[0][0]->InternalFormat;
>   out->view_minlevel = obj->MinLevel;
>   out->view_numlevels = obj->NumLevels;
>   out->view_minlayer = obj->MinLayer;
>   out->view_numlayers = obj->NumLayers;
>}
> }
>
> /* Get the handle. */
> switch (in->access) {
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/5] winsys/amdgpu: query GDS info

On Wed, Mar 7, 2018 at 3:34 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>

Reviewed-by: Alex Deucher 

> ---
>  src/amd/common/ac_gpu_info.c | 11 +++
>  src/amd/common/ac_gpu_info.h |  2 ++
>  2 files changed, 13 insertions(+)
>
> diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
> index 7c13e5f..29e2aa8 100644
> --- a/src/amd/common/ac_gpu_info.c
> +++ b/src/amd/common/ac_gpu_info.c
> @@ -94,20 +94,21 @@ static bool has_syncobj(int fd)
>
>  bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
>struct radeon_info *info,
>struct amdgpu_gpu_info *amdinfo)
>  {
> struct amdgpu_buffer_size_alignments alignment_info = {};
> struct amdgpu_heap_info vram, vram_vis, gtt;
> struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
> struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {};
> struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
> +   struct amdgpu_gds_resource_info gds = {};
> uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, 
> uvd_feature = 0;
> int r, i, j;
> drmDevicePtr devinfo;
>
> /* Get PCI info. */
> r = drmGetDevice2(fd, 0, );
> if (r) {
> fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
> return false;
> }
> @@ -241,20 +242,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
> fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) 
> failed.\n");
> return false;
> }
>
> r = amdgpu_query_sw_info(dev, amdgpu_sw_info_address32_hi, 
> >address32_hi);
> if (r) {
> fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) 
> failed.\n");
> return false;
> }
>
> +   r = amdgpu_query_gds_info(dev, );
> +   if (r) {
> +   fprintf(stderr, "amdgpu: amdgpu_query_gds_info failed.\n");
> +   return false;
> +   }
> +
> /* Set chip identification. */
> info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
> info->vce_harvest_config = amdinfo->vce_harvest_config;
>
> switch (info->pci_id) {
>  #define CHIPSET(pci_id, cfamily) case pci_id: info->family = CHIP_##cfamily; 
> break;
>  #include "pci_ids/radeonsi_pci_ids.h"
>  #undef CHIPSET
>
> default:
> @@ -276,20 +283,22 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
> }
>
> /* Set which chips have dedicated VRAM. */
> info->has_dedicated_vram =
> !(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
>
> /* Set hardware information. */
> info->gart_size = gtt.heap_size;
> info->vram_size = vram.heap_size;
> info->vram_vis_size = vram_vis.heap_size;
> +   info->gds_size = gds.gds_total_size;
> +   info->gds_gfx_partition_size = gds.gds_gfx_partition_size;
> /* The kernel can split large buffers in VRAM but not in GTT, so large
>  * allocations can fail or cause buffer movement failures in the 
> kernel.
>  */
> info->max_alloc_size = MIN2(info->vram_size * 0.9, info->gart_size * 
> 0.7);
> /* convert the shader clock from KHz to MHz */
> info->max_shader_clock = amdinfo->max_engine_clk / 1000;
> info->max_se = amdinfo->num_shader_engines;
> info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine;
> info->has_hw_decode =
> (uvd.available_rings != 0) || (vcn_dec.available_rings != 0);
> @@ -396,20 +405,22 @@ void ac_print_gpu_info(struct radeon_info *info)
>info->pci_domain, info->pci_bus,
>info->pci_dev, info->pci_func);
> printf("pci_id = 0x%x\n", info->pci_id);
> printf("family = %i\n", info->family);
> printf("chip_class = %i\n", info->chip_class);
> printf("pte_fragment_size = %u\n", info->pte_fragment_size);
> printf("gart_page_size = %u\n", info->gart_page_size);
> printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 
> 1024*1024));
> printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 
> 1024*1024));
> printf("vram_vis_size = %i MB\n", 
> (int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024));
> +   printf("gds_size = %u kB\n", info->gds_size / 1024);
> +   printf("gds_gfx_partition_size = %u kB\n", 
> info->gds_gfx_partition_size / 1024);
> printf("max_alloc_size = %i MB\n",
>(int)DIV_ROUND_UP(info->max_alloc_size, 1024*1024));
> printf("min_alloc_size = %u\n", info->min_alloc_size);
> printf("address32_hi = %u\n", info->address32_hi);
> printf("has_dedicated_vram = %u\n", info->has_dedicated_vram);
> printf("has_virtual_memory = %i\n", info->has_virtual_memory);
>

Re: [Mesa-dev] [PATCH] autotools: include all meson.build files

Ping.

Quoting Dylan Baker (2018-03-02 10:00:15)
> Otherwise SWR cannot be built with meson from an autotools generated
> tarball, such as the 18.0.0-rc4 tarball.
> 
> CC: George Kyriazis 
> CC: Emil Velikov 
> Fixes: 16bf81383080 ("meson/swr: re-shuffle generated files")
> Signed-off-by: Dylan Baker 
> ---
>  src/gallium/drivers/swr/Makefile.am | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/src/gallium/drivers/swr/Makefile.am 
> b/src/gallium/drivers/swr/Makefile.am
> index 2edaf666f11..13c7f8b7345 100644
> --- a/src/gallium/drivers/swr/Makefile.am
> +++ b/src/gallium/drivers/swr/Makefile.am
> @@ -364,7 +364,9 @@ endif
>  EXTRA_DIST = \
> SConscript \
> meson.build \
> +   rasterizer/jitter/meson.build \
> rasterizer/codegen/meson.build \
> +   rasterizer/core/backends/meson.build \
> rasterizer/archrast/events.proto \
> rasterizer/codegen/gen_llvm_ir_macros.py \
> rasterizer/codegen/gen_llvm_types.py \
> -- 
> 2.16.2
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/5] radeonsi: expand constbuf 0 address correctly to fix Vega10 hangs

On Wed, Mar 7, 2018 at 3:34 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> This is only required with the latest libdrm.
>
> This fixes 32-bit support with high addresses.
> (and possibly 64-bit support too because the high bits need to be masked out)

Acked-by: Alex Deucher 

> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 21 +
>  1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 343a5d5..e2da765 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2420,26 +2420,39 @@ static LLVMValueRef fetch_constant(
> addr = LLVMBuildLShr(ctx->ac.builder, addr, 
> LLVMConstInt(ctx->i32, 2, 0), "");
> LLVMValueRef result = 
> ac_build_load_invariant(>ac, ptr, addr);
> return bitcast(bld_base, type, result);
> }
>
> /* Do the bounds checking with a descriptor, because
>  * doing computation and manual bounds checking of 64-bit
>  * addresses generates horrible VALU code with very high
>  * VGPR usage and very low SIMD occupancy.
>  */
> -   ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->i64, "");
> -   ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
> +   ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, 
> "");
> +
> +   LLVMValueRef desc0, desc1;
> +   if (HAVE_32BIT_POINTERS) {
> +   desc0 = ptr;
> +   desc1 = LLVMConstInt(ctx->i32,
> +
> S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
> +   } else {
> +   ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, 
> ctx->v2i32, "");
> +   desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, 
> ctx->i32_0, "");
> +   desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, 
> ctx->i32_1, "");
> +   /* Mask out all bits except BASE_ADDRESS_HI. */
> +   desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
> +LLVMConstInt(ctx->i32, 
> ~C_008F04_BASE_ADDRESS_HI, 0), "");
> +   }
>
> LLVMValueRef desc_elems[] = {
> -   LLVMBuildExtractElement(ctx->ac.builder, ptr, 
> ctx->i32_0, ""),
> -   LLVMBuildExtractElement(ctx->ac.builder, ptr, 
> ctx->i32_1, ""),
> +   desc0,
> +   desc1,
> LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 
> 1) * 16, 0),
> LLVMConstInt(ctx->i32,
> S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> 
> S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
> 
> S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0)
> };
> LLVMValueRef desc = ac_build_gather_values(>ac, 
> desc_elems, 4);
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] meson: use relative paths in megadriver symlinks

Quoting Greg V (2018-03-06 11:16:04)
> e.g. libvdpau_radeonsi.so(.1(.0)) were pointing to the absolute
> build-time path of libvdpau_radeonsi.so.1.0.0, which caused trouble
> when packaging the libraries.
> ---
>  bin/install_megadrivers.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/bin/install_megadrivers.py b/bin/install_megadrivers.py
> index 86bfa35918..ce947b4332 100755
> --- a/bin/install_megadrivers.py
> +++ b/bin/install_megadrivers.py
> @@ -58,7 +58,7 @@ def main():
>  while ext != '.so':
>  if os.path.exists(name):
>  os.unlink(name)
> -os.symlink(driver, name)
> +os.symlink(os.path.relpath(driver), name)
>  name, ext = os.path.splitext(name)
>  finally:
>  os.chdir(ret)
> -- 
> 2.16.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

I'm looking at this right now, my gut is telling me this is wrong, but I don't
know why. I'm looking at it right now.


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/5] radeonsi: align command buffer starting address to fix some Raven hangs

On Wed, Mar 7, 2018 at 3:34 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> Cc: 17.3 18.0 

Reviewed-by: Alex Deucher 

> ---
>  src/amd/common/ac_gpu_info.c  | 21 -
>  src/amd/common/ac_gpu_info.h  |  1 +
>  src/gallium/drivers/radeonsi/si_pm4.c |  5 +++--
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c |  5 +++--
>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  1 +
>  5 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
> index 146098b..7c13e5f 100644
> --- a/src/amd/common/ac_gpu_info.c
> +++ b/src/amd/common/ac_gpu_info.c
> @@ -91,21 +91,23 @@ static bool has_syncobj(int fd)
> return false;
> return value ? true : false;
>  }
>
>  bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
>struct radeon_info *info,
>struct amdgpu_gpu_info *amdinfo)
>  {
> struct amdgpu_buffer_size_alignments alignment_info = {};
> struct amdgpu_heap_info vram, vram_vis, gtt;
> -   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, 
> uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {};
> +   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
> +   struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {};
> +   struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
> uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, 
> uvd_feature = 0;
> int r, i, j;
> drmDevicePtr devinfo;
>
> /* Get PCI info. */
> r = drmGetDevice2(fd, 0, );
> if (r) {
> fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
> return false;
> }
> @@ -147,20 +149,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
> fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) 
> failed.\n");
> return false;
> }
>
> r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, );
> if (r) {
> fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) 
> failed.\n");
> return false;
> }
>
> +   r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, );
> +   if (r) {
> +   fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(gfx) 
> failed.\n");
> +   return false;
> +   }
> +
> r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, );
> if (r) {
> fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) 
> failed.\n");
> return false;
> }
>
> r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD, 0, );
> if (r) {
> fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) 
> failed.\n");
> return false;
> @@ -333,20 +341,31 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
>
> memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode,
> sizeof(amdinfo->gb_macro_tile_mode));
>
> info->pte_fragment_size = alignment_info.size_local;
> info->gart_page_size = alignment_info.size_remote;
>
> if (info->chip_class == SI)
> info->gfx_ib_pad_with_type2 = TRUE;
>
> +   unsigned ib_align = 0;
> +   ib_align = MAX2(ib_align, gfx.ib_start_alignment);
> +   ib_align = MAX2(ib_align, compute.ib_start_alignment);
> +   ib_align = MAX2(ib_align, dma.ib_start_alignment);
> +   ib_align = MAX2(ib_align, uvd.ib_start_alignment);
> +   ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment);
> +   ib_align = MAX2(ib_align, vce.ib_start_alignment);
> +   ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment);
> +   ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment);
> +   info->ib_start_alignment = ib_align;
> +
> return true;
>  }
>
>  void ac_compute_driver_uuid(char *uuid, size_t size)
>  {
> char amd_uuid[] = "AMD-MESA-DRV";
>
> assert(size >= sizeof(amd_uuid));
>
> memset(uuid, 0, size);
> diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
> index 7c86dc1..0beba96 100644
> --- a/src/amd/common/ac_gpu_info.h
> +++ b/src/amd/common/ac_gpu_info.h
> @@ -55,20 +55,21 @@ struct radeon_info {
> uint64_tgart_size;
> uint64_tvram_size;
> uint64_tvram_vis_size;
> uint64_tmax_alloc_size;
> uint32_tmin_alloc_size;
> uint32_taddress32_hi;
> boolhas_dedicated_vram;
> boolhas_virtual_memory;
> boolgfx_ib_pad_with_type2;
> bool

Re: [Mesa-dev] [PATCH 3/5] winsys/amdgpu: pad compute rings

On Thu, Mar 8, 2018 at 11:51 AM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> v2: pad with PKT2 NOPs on SI

Reviewed-by: Alex Deucher 

> ---
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index d9a95c0..a3feeb9 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -1521,29 +1521,31 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs 
> *rcs,
>/* pad DMA ring to 8 DWs */
>if (ws->info.chip_class <= SI) {
>   while (rcs->current.cdw & 7)
>  radeon_emit(rcs, 0xf000); /* NOP packet */
>} else {
>   while (rcs->current.cdw & 7)
>  radeon_emit(rcs, 0x); /* NOP packet */
>}
>break;
> case RING_GFX:
> +   case RING_COMPUTE:
>/* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
>if (ws->info.gfx_ib_pad_with_type2) {
>   while (rcs->current.cdw & 7)
>  radeon_emit(rcs, 0x8000); /* type2 nop packet */
>} else {
>   while (rcs->current.cdw & 7)
>  radeon_emit(rcs, 0x1000); /* type3 nop packet */
>}
> -  ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
> +  if (cs->ring_type == RING_GFX)
> + ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
>break;
> case RING_UVD:
> case RING_UVD_ENC:
>while (rcs->current.cdw & 15)
>   radeon_emit(rcs, 0x8000); /* type2 nop packet */
>break;
> case RING_VCN_DEC:
>while (rcs->current.cdw & 15)
>   radeon_emit(rcs, 0x81ff); /* nop packet */
>break;
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/56] anv/entrypoints: Generalize the string map a bit

On Thu, Mar 8, 2018 at 8:45 AM, Dylan Baker  wrote:

> Quoting Jason Ekstrand (2018-03-07 20:22:51)
> > Yes, that is what happened.  That said, wrote that patch in September and
> > you've had about 6 months to look at it.  The only particularly active
> Mesa
> > contributor who hasn't had access is Ilia.
>
> No, just no. Having a patch in a branch does not count, especially not in a
> closed branch. I have plenty of patches that have sat in branches for
> months,
> years even. You're saying it's okay for me to send them to the list and
> push
> them a couple hours later because I wrote them a long time ago?

No, that's not what I'm saying.  However, I think there's a difference
between a private branch that you've had sitting around for a while and a
mostly public branch that you've been pestering your coworkers to review
for the past 6 months and gotten zero takers.  Every single patch I sent
had been reviewed and many of them by multiple people.

This is something that we as a community (and team) need to sort out.  With
both hardware enabling and new extension work, we are working with
embargoes.  Sometimes large pieces of work go into enabling said hardware
and features.  This series was fairly small at 56 patches; If you look at
all of Vulkan 1.1, it's probably more like 500.  If we wait until it's
public to get code review, you may be looking at weeks or months before you
can land it.

This problem is only getting worse now that the mesa project is getting
caught up on features.  It used to be that we could do basically everything
publicly because we were several whole GL versions behind and basically
zero feature work was embargoed.  The only people working with an embargo
were people doing hardware enabling and they were sending the patches out
months before the hardware was available to anyone so waiting a week or two
doesn't matter.  Now, basically everything we do that isn't refactoring or
optimization work has to happen behind closed doors.  It's unfortunate, but
it's also reality.

How do we deal with that as an open-source community?  That's a good
question and one which I'm happy to discuss.  I'm not sure what the right
balance is here but the "it doesn't exist until it's public" model just
isn't fair to the people who are in the unfortunate circumstance of working
under an embargo.

On Thu, Mar 8, 2018 at 10:37 AM, Michel Dänzer  wrote:

> On 2018-03-08 06:10 PM, Dylan Baker wrote:
> >
> > When I was given commit access I was told that I should wait 24 hours
> > after sending patches unless they were trivial or fixed something
> > critical, ie, without them you can't compile or nothing works.
>
> FWIW, I think that's a good rule, and I follow it.
>
> If one doesn't wait for at least 24 hours, e.g. somebody living in a
> different timezone may not get a chance to send feedback before the
> patch is applied. So it's kind of implying one isn't interested in
> feedback from such people.
>

I agree.  24 hours means one turn of the globe and pushing much faster than
that does sort-of imply that you don't care about that feedback.  In this
case, the only thing that's implied is that I don't care too much about
feedback from the 5% of the mesa community who doesn't have a Khronos
account.  Maybe that makes me a jerk, but I didn't think it did.

> > I know we've always given a lot of flexibility to vendor specific code
> > (i965 or nouveau), but you hope everyone can understand my frustration
> > with a 56 patch series that I sent review for 8 hours after it was
> > posted to the list and I got told "Oh, I merged that hours ago,
> > patches welcome."
>
> I can. I guess Jason got a bit carried away by the Vulkan 1.1 excitement.
>

Perhaps.  :-)  I do think that being there day-1 is important.  If nothing
else, it shows the rest of the graphics community (who already fears the
concept of open-source) that working in the open isn't going to cramp their
style.  If we can deliver full-featured and fully conformant Vulkan 1.1
drivers on day 1, then they can to.  I think that's an important message
for the open-source community to send.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] meson: use relative paths in megadriver symlinks

On 6 March 2018 at 19:16, Greg V  wrote:
> e.g. libvdpau_radeonsi.so(.1(.0)) were pointing to the absolute
> build-time path of libvdpau_radeonsi.so.1.0.0, which caused trouble
> when packaging the libraries.

Patch looks correct, although CC-ing Dylan as our resident python expert.

Fixes: f7f1b30f81e ("meson: extend install_megadrivers script to
handle symmlinking")
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] meson: Use system_has_kms_drm in default driver selection

Quoting Greg V (2018-03-06 11:16:03)
> ---
>  meson.build | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index d68460231c..e71f4ddd73 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -87,6 +87,8 @@ if (with_gles1 or with_gles2) and not with_opengl
>error('building OpenGL ES without OpenGL is not supported.')
>  endif
>  
> +system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 
> 'linux'].contains(host_machine.system())
> +
>  with_dri = false
>  with_dri_i915 = false
>  with_dri_i965 = false
> @@ -96,7 +98,7 @@ with_dri_nouveau = false
>  with_dri_swrast = false
>  _drivers = get_option('dri-drivers')
>  if _drivers == 'auto'
> -  if host_machine.system() == 'linux'
> +  if system_has_kms_drm
>  # TODO: PPC, Sparc
>  if ['x86', 'x86_64'].contains(host_machine.cpu_family())
>_drivers = 'i915,i965,r100,r200,nouveau'
> @@ -139,7 +141,7 @@ with_gallium_virgl = false
>  with_gallium_swr = false
>  _drivers = get_option('gallium-drivers')
>  if _drivers == 'auto'
> -  if host_machine.system() == 'linux'
> +  if system_has_kms_drm
>  # TODO: PPC, Sparc
>  if ['x86', 'x86_64'].contains(host_machine.cpu_family())
>_drivers = 'r300,r600,radeonsi,nouveau,virgl,svga,swrast'
> @@ -179,7 +181,7 @@ with_amd_vk = false
>  with_any_vk = false
>  _vulkan_drivers = get_option('vulkan-drivers')
>  if _vulkan_drivers == 'auto'
> -  if host_machine.system() == 'linux'
> +  if system_has_kms_drm

Do vulkan drivers work on all of the BSDs?

>  if host_machine.cpu_family().startswith('x86')
>_vulkan_drivers = 'amd,intel'
>  else
> @@ -217,8 +219,6 @@ if with_dri_i915 or with_gallium_i915
>dep_libdrm_intel = dependency('libdrm_intel', version : '>= 2.4.75')
>  endif
>  
> -system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 
> 'linux'].contains(host_machine.system())
> -
>  if host_machine.system() == 'darwin'
>with_dri_platform = 'apple'
>  elif ['windows', 'cygwin'].contains(host_machine.system())
> -- 
> 2.16.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] meson: make GLX_USE_TLS optional

Quoting Eric Anholt (2018-03-08 10:12:02)
> Greg V  writes:
> 
> > FreeBSD builds Mesa with --disable-glx-tls in autotools because:
> > https://github.com/dumbbell/test-tls-initial-exec
> >
> > Add the equivalent option to Meson.
> > ---
> >  meson.build   | 5 -
> >  meson_options.txt | 6 ++
> >  2 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/meson.build b/meson.build
> > index e71f4ddd73..1c4293d464 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -329,7 +329,10 @@ if with_egl and not (with_platform_drm or 
> > with_platform_surfaceless)
> >endif
> >  endif
> >  
> > -pre_args += '-DGLX_USE_TLS'
> > +if get_option('glx-tls')
> > +  pre_args += '-DGLX_USE_TLS'
> > +endif
> 
> Instead of introducing an option, could we just test
> host_machine.system() for freebsd here, so that nobody on any OS can
> choose the wrong value?
> 

I third this, this was always my plan for the meson build if someone needed to
turn off GLX_USE_TLS.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/56] anv/entrypoints: Generalize the string map a bit

2018-03-08 Thread Michel Dänzer

On 2018-03-08 06:10 PM, Dylan Baker wrote:
> 
> When I was given commit access I was told that I should wait 24 hours
> after sending patches unless they were trivial or fixed something
> critical, ie, without them you can't compile or nothing works.

FWIW, I think that's a good rule, and I follow it.

If one doesn't wait for at least 24 hours, e.g. somebody living in a
different timezone may not get a chance to send feedback before the
patch is applied. So it's kind of implying one isn't interested in
feedback from such people.


> I know we've always given a lot of flexibility to vendor specific code
> (i965 or nouveau), but you hope everyone can understand my frustration
> with a 56 patch series that I sent review for 8 hours after it was
> posted to the list and I got told "Oh, I merged that hours ago,
> patches welcome." 

I can. I guess Jason got a bit carried away by the Vulkan 1.1 excitement.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] meson: make GLX_USE_TLS optional

On 8 March 2018 at 18:12, Eric Anholt  wrote:
> Greg V  writes:
>
>> FreeBSD builds Mesa with --disable-glx-tls in autotools because:
>> https://github.com/dumbbell/test-tls-initial-exec
>>
>> Add the equivalent option to Meson.
>> ---
>>  meson.build   | 5 -
>>  meson_options.txt | 6 ++
>>  2 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/meson.build b/meson.build
>> index e71f4ddd73..1c4293d464 100644
>> --- a/meson.build
>> +++ b/meson.build
>> @@ -329,7 +329,10 @@ if with_egl and not (with_platform_drm or 
>> with_platform_surfaceless)
>>endif
>>  endif
>>
>> -pre_args += '-DGLX_USE_TLS'
>> +if get_option('glx-tls')
>> +  pre_args += '-DGLX_USE_TLS'
>> +endif
>
> Instead of introducing an option, could we just test
> host_machine.system() for freebsd here, so that nobody on any OS can
> choose the wrong value?
>
This please. Other platforms also have this problem but it's something
they should sit down and address.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa] vulkan/wsi: clean up cleanup path

On 26 February 2018 at 13:51, Eric Engestrom  wrote:
> Cc: Keith Packard 
> Signed-off-by: Eric Engestrom 
> ---
> This will make Keith's addition much cleaner as well :)

Hey, every little helps. Fwiw
Reviewed-by: Emil Velikov 

Another idea - make the wsi_*_{init,fini}_wsi kernel style stubs ;-)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] meson: make GLX_USE_TLS optional

2018-03-08 Thread Eric Anholt

Greg V  writes:

> FreeBSD builds Mesa with --disable-glx-tls in autotools because:
> https://github.com/dumbbell/test-tls-initial-exec
>
> Add the equivalent option to Meson.
> ---
>  meson.build   | 5 -
>  meson_options.txt | 6 ++
>  2 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/meson.build b/meson.build
> index e71f4ddd73..1c4293d464 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -329,7 +329,10 @@ if with_egl and not (with_platform_drm or 
> with_platform_surfaceless)
>endif
>  endif
>  
> -pre_args += '-DGLX_USE_TLS'
> +if get_option('glx-tls')
> +  pre_args += '-DGLX_USE_TLS'
> +endif

Instead of introducing an option, could we just test
host_machine.system() for freebsd here, so that nobody on any OS can
choose the wrong value?


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa 01/21] vulkan: Add KHR_display extension using DRM [v4]

2018-03-08 Thread Keith Packard

Eric Engestrom  writes:

> I'm still slightly bothered by the build options inconsistency between
> autotools: reuse --with-platform=drm
> meson: new   -D platform=display
>
> I'd be happy with either one being used everywhere, but from what you
> said last time I think you'd prefer to reuse `platform=drm`; can we
> drop the meson hunks above and use `with_platform_drm` in
> src/vulkan/wsi/meson.build?

Thanks for the reminder; I got totally bound up in Jason's code fixes
and rebasing onto current master that I just forgot. I've got three tiny
patches on my drm-lease-v2 branch which does this (one for core, one for
anv, one for radv).

> About that hunk, have you looked at my patch?
> https://patchwork.freedesktop.org/patch/206818/

Yeah, that definitely looks nicer. I've reviewed it to make sure calling
the various finish functions is harmless when called before the matching
init function. If it gets applied before my series, I'll adapt :-)

-- 
-keith

signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa] vulkan/wsi: clean up cleanup path

2018-03-08 Thread Keith Packard

Eric Engestrom  writes:

> Cc: Keith Packard 
> Signed-off-by: Eric Engestrom 

Reviewed-by: Keith Packard 

-- 
-keith


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] egl/wayland: Add 10bpc BGR configs

2018-03-08 Thread Daniel Stone

Add support for XBGR2101010 and ABGR2101010.

Signed-off-by: Daniel Stone 
Cc: Ilia Mirkin 
---
 src/egl/drivers/dri2/platform_wayland.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index 877f7933b9a..7a32491974e 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -81,6 +81,18 @@ static const struct dri2_wl_visual {
  __DRI_IMAGE_FORMAT_ARGB2101010, 32,
  { 0x3ff0, 0x000ffc00, 0x03ff, 0xc000 }
},
+   {
+ "XBGR2101010",
+ WL_DRM_FORMAT_XBGR2101010, WL_SHM_FORMAT_XBGR2101010,
+ __DRI_IMAGE_FORMAT_XBGR2101010, 32,
+ { 0x03ff, 0x000ffc00, 0x3ff0, 0x }
+   },
+   {
+ "ABGR2101010",
+ WL_DRM_FORMAT_ABGR2101010, WL_SHM_FORMAT_ABGR2101010,
+ __DRI_IMAGE_FORMAT_ABGR2101010, 32,
+ { 0x03ff, 0x000ffc00, 0x3ff0, 0xc000 }
+   },
{
  "XRGB",
  WL_DRM_FORMAT_XRGB, WL_SHM_FORMAT_XRGB,
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] gbm: Add support for 10bpp BGR formats

2018-03-08 Thread Daniel Stone

Add support for XBGR2101010 and ABGR2101010 formats.

Signed-off-by: Daniel Stone 
---
 src/gbm/backends/dri/gbm_dri.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index df20db40218..b3d6ceb15a3 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -580,6 +580,14 @@ static const struct gbm_dri_visual gbm_dri_visuals_table[] 
= {
  GBM_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ARGB2101010,
  { 0x3ff0, 0x000ffc00, 0x03ff, 0xc000 },
},
+   {
+ GBM_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XBGR2101010,
+ { 0x03ff, 0x000ffc00, 0x3ff0, 0x },
+   },
+   {
+ GBM_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ABGR2101010,
+ { 0x03ff, 0x000ffc00, 0x3ff0, 0xc000 },
+   },
 };
 
 /* The two GBM_BO_FORMAT_[XA]RGB formats alias the GBM_FORMAT_*
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] nouveau 30bpp / deep color status

2018-03-08 Thread Daniel Stone

Hi,

On 8 March 2018 at 17:08, Ilia Mirkin  wrote:
> On Thu, Mar 8, 2018 at 11:57 AM, Mario Kleiner
>  wrote:
>> Under EGL there is matching of channel masks, so only X11+GLX is
>> problematic. Not sure if anything special would need to be done for
>> XWayland, haven't looked at that at all so far. Or the modesetting ddx,
>> which currently assumes xrgb ordering for 10 bit.
>
> For the modesetting ddx, it has to switch to drmAddFB2 so that it
> knows the exact format. No other way around that, unfortunately. But
> that'll require work, and I'm happy enough that xf86-video-nouveau
> works (as that is what I recommend to anyone who'll listen).

modesetting now uses AddFB2, as of relatively recently.

>> There are some from Daniel which unify the handling of formats inside egl,
>> not with any abgr2101010 definitions though. Indeed on master compositing
>> doesn't work for depth 30 windows. I have some patches that fix this, and
>> some hack for EGL/x11 compositing that seems to work. Will send them out
>> soon.
>
> D'oh! Those patches were definitely there. I guess they got dropped at
> some point. Daniel, can you resend those?

Oops. Is this X11 or Wayland compositing? I'll resend those two, but
it would probably be better to hold off merging them until you can
verify I haven't done anything stupid.

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/56] anv/entrypoints: Generalize the string map a bit

On 7 March 2018 at 14:34, Jason Ekstrand  wrote:
> The original string map assumed that the mapping from strings to
> entrypoints was a bijection.  This will not be true the moment we
> add entrypoint aliasing.  This reworks things to be an arbitrary map
> from strings to non-negative signed integers.  The old one also had a
> potential bug if we ever had a hash collision because it didn't do the
> strcmp inside the lookup loop.  While we're at it, we break things out
> into a helpful class.
>
Instead of reworking it one can use a simple gcc/clang alias or
writing it out manually.
I'd imagine that the original code is faster, so there's little point
in using something else ;-)

[static] return_type
alias_name(args) __attribute__ ((alias(original_function)));

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: gl_program::info.system_values_read is a 64-bit-field

2018-03-08 Thread Marek Olšák

Reviewed-by: Marek Olšák 

Marek

On Thu, Mar 8, 2018 at 11:50 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> We were dropping the upper 32 bits, which caused assertion failures in
> some compute shader piglit tests with radeonsi since the commit below.
>
> Fixes: 752e96970303 ("compiler: Add two new system values for subgroups")
> Signed-off-by: Michel Dänzer 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 
>  src/mesa/state_tracker/st_mesa_to_tgsi.c   | 6 +++---
>  2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index ccf4dabcc9f..911c855d43a 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -6532,10 +6532,10 @@ st_translate_program(
> /* Declare misc input registers
>  */
> {
> -  GLbitfield sysInputs = proginfo->info.system_values_read;
> +  GLbitfield64 sysInputs = proginfo->info.system_values_read;
>
>for (i = 0; sysInputs; i++) {
> - if (sysInputs & (1 << i)) {
> + if (sysInputs & (1ull << i)) {
>  enum tgsi_semantic semName = _mesa_sysval_to_semantic(i);
>
>  t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
> @@ -6567,7 +6567,7 @@ st_translate_program(
> emit_wpos(st_context(ctx), t, proginfo, ureg,
>   program->wpos_transform_const);
>
> -sysInputs &= ~(1 << i);
> +sysInputs &= ~(1ull << i);
>   }
>}
> }
> @@ -6864,7 +6864,7 @@ get_mesa_program_tgsi(struct gl_context *ctx,
> /* This must be done before the uniform storage is associated. */
> if (shader->Stage == MESA_SHADER_FRAGMENT &&
> (prog->info.inputs_read & VARYING_BIT_POS ||
> -prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) {
> +prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD))) {
>static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
>   STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
>};
> diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
> b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> index c76180a5799..99cddd66282 100644
> --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
> +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> @@ -951,9 +951,9 @@ st_translate_mesa_program(struct gl_context *ctx,
>
> /* Declare misc input registers
>  */
> -   GLbitfield sysInputs = program->info.system_values_read;
> +   GLbitfield64 sysInputs = program->info.system_values_read;
> for (i = 0; sysInputs; i++) {
> -  if (sysInputs & (1 << i)) {
> +  if (sysInputs & (1ull << i)) {
>   unsigned semName = _mesa_sysval_to_semantic(i);
>
>   t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
> @@ -985,7 +985,7 @@ st_translate_mesa_program(struct gl_context *ctx,
>   semName == TGSI_SEMANTIC_POSITION)
>  emit_wpos(st_context(ctx), t, program, ureg);
>
> -  sysInputs &= ~(1 << i);
> +  sysInputs &= ~(1ull << i);
>}
> }
>
> --
> 2.16.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] egl: autotools: do not redirect stdin/stdout for wayland-scanner

for the series,
Reviewed-by: Dylan Baker 

Quoting Emil Velikov (2018-03-08 09:09:21)
> From: Emil Velikov 
> 
> The tool accepts the input and output files as arguments.
> There's no need for the redirection.
> 
> Signed-off-by: Emil Velikov 
> ---
>  src/egl/Makefile.am | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
> index 9fcee9d2b84..285d3caa9ef 100644
> --- a/src/egl/Makefile.am
> +++ b/src/egl/Makefile.am
> @@ -70,11 +70,11 @@ WL_DMABUF_XML = 
> $(WAYLAND_PROTOCOLS_DATADIR)/unstable/linux-dmabuf/linux-dmabuf-
>  
>  drivers/dri2/linux-dmabuf-unstable-v1-protocol.c: $(WL_DMABUF_XML)
> $(MKDIR_GEN)
> -   $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
> +   $(AM_V_GEN)$(WAYLAND_SCANNER) code $< $@
>  
>  drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h: $(WL_DMABUF_XML)
> $(MKDIR_GEN)
> -   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
> +   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header $< $@
>  
>  if HAVE_PLATFORM_WAYLAND
>  drivers/dri2/linux-dmabuf-unstable-v1-protocol.lo: 
> drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h
> -- 
> 2.16.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-03-08 Thread Lionel Landwerlin


On 08/03/18 17:00, Chris Wilson wrote:

Quoting Lionel Landwerlin (2018-03-08 15:42:53)

+static void
+read_gt_frequency(struct brw_context *brw,
+  struct brw_perf_query_object *obj)
+{
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
+  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
+
+   switch (devinfo->gen) {
+   case 7:
+   case 8:
+  obj->oa.gt_frequency[0] =
+ ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
+  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;
+  obj->oa.gt_frequency[1] =
+ ((end_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
+  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

I was just thinking this was the wrong frequency conversion for byt/bsw,
but then they don't have RPSTAT1 either. Is the OA only for big core?


I think you found a bug!

I can see in the internal documentation that CHV has RPSTAT1, except it 
doesn't have the fields we want.

So this needs to be dealt with/fixed.

On BXT I got sensible numbers from this register, so that leaves gen8 
based small cores in the unknown...

Documentation is again unhelpful...

Thanks,

-
Lionel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] wayland-drm: autotools: do not redirect stdin/stdout for wayland-scanner

From: Emil Velikov 

The tool accepts the input and output files as arguments.
There's no need for the redirection.

Signed-off-by: Emil Velikov 
---
 src/egl/wayland/wayland-drm/Makefile.am | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/egl/wayland/wayland-drm/Makefile.am 
b/src/egl/wayland/wayland-drm/Makefile.am
index 7d1db23f4b9..c10c4349d48 100644
--- a/src/egl/wayland/wayland-drm/Makefile.am
+++ b/src/egl/wayland/wayland-drm/Makefile.am
@@ -18,10 +18,10 @@ BUILT_SOURCES = wayland-drm-protocol.c \
 CLEANFILES = $(BUILT_SOURCES)
 
 %-protocol.c : %.xml
-   $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) code $< $@
 
 %-server-protocol.h : %.xml
-   $(AM_V_GEN)$(WAYLAND_SCANNER) server-header < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) server-header $< $@
 
 %-client-protocol.h : %.xml
-   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header $< $@
-- 
2.16.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] egl: autotools: do not redirect stdin/stdout for wayland-scanner

From: Emil Velikov 

The tool accepts the input and output files as arguments.
There's no need for the redirection.

Signed-off-by: Emil Velikov 
---
 src/egl/Makefile.am | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
index 9fcee9d2b84..285d3caa9ef 100644
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -70,11 +70,11 @@ WL_DMABUF_XML = 
$(WAYLAND_PROTOCOLS_DATADIR)/unstable/linux-dmabuf/linux-dmabuf-
 
 drivers/dri2/linux-dmabuf-unstable-v1-protocol.c: $(WL_DMABUF_XML)
$(MKDIR_GEN)
-   $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) code $< $@
 
 drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h: $(WL_DMABUF_XML)
$(MKDIR_GEN)
-   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header $< $@
 
 if HAVE_PLATFORM_WAYLAND
 drivers/dri2/linux-dmabuf-unstable-v1-protocol.lo: 
drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h
-- 
2.16.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] vulkan: autotools: do not redirect stdin/stdout for wayland-scanner

From: Emil Velikov 

The tool accepts the input and output files as arguments.
There's no need for the redirection.

Signed-off-by: Emil Velikov 
---
 src/vulkan/Makefile.am | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am
index cbffd769fa4..e6d4277f92f 100644
--- a/src/vulkan/Makefile.am
+++ b/src/vulkan/Makefile.am
@@ -64,21 +64,21 @@ WL_DRM_XML = 
$(top_srcdir)/src/egl/wayland/wayland-drm/wayland-drm.xml
 
 wsi/wayland-drm-protocol.c : $(WL_DRM_XML)
$(MKDIR_GEN)
-   $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) code $< $@
 
 wsi/wayland-drm-client-protocol.h : $(WL_DRM_XML)
$(MKDIR_GEN)
-   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header $< $@
 
 WL_DMABUF_XML = 
$(WAYLAND_PROTOCOLS_DATADIR)/unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml
 
 wsi/linux-dmabuf-unstable-v1-protocol.c : $(WL_DMABUF_XML)
$(MKDIR_GEN)
-   $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) code $< $@
 
 wsi/linux-dmabuf-unstable-v1-client-protocol.h : $(WL_DMABUF_XML)
$(MKDIR_GEN)
-   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
+   $(AM_V_GEN)$(WAYLAND_SCANNER) client-header $< $@
 
 if HAVE_PLATFORM_WAYLAND
 wsi/linux-dmabuf-unstable-v1-protocol.lo: 
wsi/linux-dmabuf-unstable-v1-client-protocol.h
-- 
2.16.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/7] nir/spirv: propagate constants of GroupNonUniformQuad instructions and eliminate warning

---
 src/compiler/spirv/vtn_subgroup.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/compiler/spirv/vtn_subgroup.c 
b/src/compiler/spirv/vtn_subgroup.c
index bd3143962b..73420b7e43 100644
--- a/src/compiler/spirv/vtn_subgroup.c
+++ b/src/compiler/spirv/vtn_subgroup.c
@@ -261,7 +261,9 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
case SpvOpGroupNonUniformQuadBroadcast:
   vtn_build_subgroup_instr(b, nir_intrinsic_quad_broadcast,
val->ssa, vtn_ssa_value(b, w[4]),
-   vtn_ssa_value(b, w[5])->def, 0, 0);
+   vtn_ssa_value(b, w[5])->def,
+   vtn_constant_value(b, w[5])->values[0].u32[0],
+   0);
   break;
 
case SpvOpGroupNonUniformQuadSwap: {
@@ -277,9 +279,11 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
   case 2:
  op = nir_intrinsic_quad_swap_diagonal;
  break;
+  default:
+ vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap");
   }
   vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]),
-   NULL, 0, 0);
+   NULL, direction, 0);
   break;
}
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/7] ac: add LLVM build functions for subgroup instrinsics

Co-authored-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c | 475 +
 src/amd/common/ac_llvm_build.h |  33 ++-
 2 files changed, 507 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 5341eb3f07..7c5bf6b801 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2350,3 +2350,478 @@ void ac_build_uif(struct ac_llvm_context *ctx, 
LLVMValueRef value,
  ctx->i32_0, "");
if_cond_emit(ctx, cond, label_id);
 }
+
+static LLVMValueRef
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
lane)
+{
+   ac_build_optimization_barrier(ctx, );
+   return ac_build_intrinsic(ctx,
+   lane == NULL ? "llvm.amdgcn.readfirstlane" : 
"llvm.amdgcn.readlane",
+   LLVMTypeOf(src), (LLVMValueRef []) {
+   src, lane },
+   lane == NULL ? 1 : 2,
+   AC_FUNC_ATTR_NOUNWIND |
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+}
+
+/**
+ * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
+ * @param ctx
+ * @param src
+ * @param lane - id of the lane or NULL for the first active lane
+ * @return value of the lane
+ */
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
lane)
+{
+   LLVMTypeRef src_type = LLVMTypeOf(src);
+   src = ac_to_integer(ctx, src);
+   unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+   LLVMValueRef ret;
+   if (bits == 32) {
+   ret = _ac_build_readlane(ctx, src, lane);
+   } else {
+   assert(bits % 32 == 0);
+   LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+   LLVMValueRef src_vector =
+   LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+   ret = LLVMGetUndef(vec_type);
+   for (unsigned i = 0; i < bits / 32; i++) {
+   src = LLVMBuildExtractElement(ctx->builder, src_vector,
+   LLVMConstInt(ctx->i32, i, 0), 
"");
+   LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, 
lane);
+   ret = LLVMBuildInsertElement(ctx->builder, ret, 
ret_comp,
+   LLVMConstInt(ctx->i32, i, 0), 
"");
+   }
+   }
+   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+LLVMValueRef
+ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
value, LLVMValueRef lane)
+{
+   /* TODO: Use the actual instruction when LLVM adds an intrinsic for it.
+*/
+   LLVMValueRef pred = LLVMBuildICmp(ctx->builder, LLVMIntEQ, lane,
+ ac_get_thread_id(ctx), "");
+   return LLVMBuildSelect(ctx->builder, pred, value, src, "");
+}
+
+LLVMValueRef
+ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
+{
+   LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
+LLVMVectorType(ctx->i32, 2),
+"");
+   LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec,
+  ctx->i32_0, "");
+   LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec,
+  ctx->i32_1, "");
+   LLVMValueRef val =
+   ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+  (LLVMValueRef []) { mask_lo, ctx->i32_0 },
+  2, AC_FUNC_ATTR_READNONE);
+   val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32,
+(LLVMValueRef []) { mask_hi, val },
+2, AC_FUNC_ATTR_READNONE);
+   return val;
+}
+
+enum dpp_ctrl {
+   _dpp_quad_perm = 0x000,
+   _dpp_row_sl = 0x100,
+   _dpp_row_sr = 0x110,
+   _dpp_row_rr = 0x120,
+   dpp_wf_sl1 = 0x130,
+   dpp_wf_rl1 = 0x134,
+   dpp_wf_sr1 = 0x138,
+   dpp_wf_rr1 = 0x13C,
+   dpp_row_mirror = 0x140,
+   dpp_row_half_mirror = 0x141,
+   dpp_row_bcast15 = 0x142,
+   dpp_row_bcast31 = 0x143
+};
+
+static inline enum dpp_ctrl
+dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
+{
+   assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
+   return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 
6);
+}
+
+static inline enum dpp_ctrl
+dpp_row_sl(unsigned amount)
+{
+   assert(amount > 0 && amount < 16);
+   return _dpp_row_sl | amount;
+}
+
+static inline enum dpp_ctrl
+dpp_row_sr(unsigned amount)
+{
+

[Mesa-dev] [PATCH 0/7] radv: add support for the new subgroup capabilities

This patch series implements the following SPIR-V capabilities for RADV:
GroupNonUniform
GroupNonUniformVote
GroupNonUniformArithmetic
GroupNonUniformBallot
GroupNonUniformShuffle
GroupNonUniformShuffleRelative
GroupNonUniformQuad

Not yet supported is the GroupNonUniformClustered capability due to a bug in 
the LLVM WWM liveness analysis.

Except for GroupNonUniformArithmetic, the capabilities should also work on 
pre-VI chip class (untested).
To add support for GroupNonUniformArithmetic on pre-VI chip class, in/exclusive 
scan has to be implemented using the according instructions.

Daniel Schürmann (7):
  nir: adjust subgroups instructions for 64bit ballot sizes
  nir/spirv: propagate constants of GroupNonUniformQuad instructions and
eliminate warning
  ac: lower 64bit subgroup intrinsics
  ac: make ballot and umsb capable of 64bit inputs
  ac: add LLVM build functions for subgroup instrinsics
  ac: handle subgroup intrinsics
  radv: enable subgroup capabilities

 src/amd/common/ac_llvm_build.c | 508 -
 src/amd/common/ac_llvm_build.h |  33 ++-
 src/amd/common/ac_lower_subgroups.c|  50 +++-
 src/amd/common/ac_nir_to_llvm.c|  66 +++--
 src/amd/vulkan/radv_device.c   |  12 +-
 src/amd/vulkan/radv_shader.c   |   6 +-
 src/compiler/nir/nir_lower_subgroups.c |   5 +-
 src/compiler/nir/nir_opcodes.py|  12 +-
 src/compiler/spirv/vtn_subgroup.c  |   8 +-
 9 files changed, 643 insertions(+), 57 deletions(-)

-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/7] ac: handle subgroup intrinsics

---
 src/amd/common/ac_nir_to_llvm.c | 66 +++--
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b85069860..0f4cc32f15 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4363,36 +4363,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = ac_build_ballot(>ac, get_src(ctx, instr->src[0]));
break;
case nir_intrinsic_read_invocation:
-   case nir_intrinsic_read_first_invocation: {
-   LLVMValueRef args[2];
-
-   /* Value */
-   args[0] = get_src(ctx, instr->src[0]);
-
-   unsigned num_args;
-   const char *intr_name;
-   if (instr->intrinsic == nir_intrinsic_read_invocation) {
-   num_args = 2;
-   intr_name = "llvm.amdgcn.readlane";
-
-   /* Invocation */
-   args[1] = get_src(ctx, instr->src[1]);
-   } else {
-   num_args = 1;
-   intr_name = "llvm.amdgcn.readfirstlane";
-   }
-
-   /* We currently have no other way to prevent LLVM from lifting 
the icmp
-* calls to a dominating basic block.
-*/
-   ac_build_optimization_barrier(>ac, [0]);
-
-   result = ac_build_intrinsic(>ac, intr_name,
-   ctx->ac.i32, args, num_args,
-   AC_FUNC_ATTR_READNONE |
-   AC_FUNC_ATTR_CONVERGENT);
+   result = ac_build_readlane(>ac, get_src(ctx, 
instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_read_first_invocation:
+   result = ac_build_readlane(>ac, get_src(ctx, 
instr->src[0]), NULL);
break;
-   }
case nir_intrinsic_load_subgroup_invocation:
result = ac_get_thread_id(>ac);
break;
@@ -4646,6 +4622,38 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
break;
}
+   case nir_intrinsic_shuffle:
+   result = ac_build_shuffle(>ac, get_src(ctx, instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_reduce:
+   result = ac_build_reduce(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0],
+   instr->const_index[1]);
+   break;
+   case nir_intrinsic_inclusive_scan:
+   result = ac_build_inclusive_scan(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_exclusive_scan:
+   result = ac_build_exclusive_scan(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_quad_broadcast: {
+   unsigned lane = instr->const_index[0];
+   result = ac_build_quad_swizzle(>ac, get_src(ctx, 
instr->src[0]),
+   lane, lane, lane, lane);
+   break;
+   }
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   result = ac_build_quad_swap(>ac, get_src(ctx, 
instr->src[0]),
+   instr->const_index[0]);
+   break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(>instr, stderr);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/7] radv: enable subgroup capabilities

---
 src/amd/vulkan/radv_device.c | 12 ++--
 src/amd/vulkan/radv_shader.c |  6 +-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 7a11e08f97..4200eb7d7d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -929,8 +929,16 @@ void radv_GetPhysicalDeviceProperties2(
(VkPhysicalDeviceSubgroupProperties*)ext;
properties->subgroupSize = 64;
properties->supportedStages = VK_SHADER_STAGE_ALL;
-   properties->supportedOperations = 
VK_SUBGROUP_FEATURE_BASIC_BIT;
-   properties->quadOperationsInAllStages = false;
+   properties->supportedOperations =
+   
VK_SUBGROUP_FEATURE_BASIC_BIT |
+   
VK_SUBGROUP_FEATURE_BALLOT_BIT |
+   
VK_SUBGROUP_FEATURE_QUAD_BIT |
+   
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
+   
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
+   
VK_SUBGROUP_FEATURE_VOTE_BIT |
+   (HAVE_LLVM >= 0x600 && 
pdevice->rad_info.chip_class >= VI ?
+   
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT : 0);
+   properties->quadOperationsInAllStages = true;
break;
}
case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 85672e600d..ae139b6dcd 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -212,7 +212,12 @@ radv_shader_compile_to_nir(struct radv_device *device,
.tessellation = true,
.int64 = true,
.multiview = true,
+   .subgroup_arithmetic = true,
+   .subgroup_ballot = true,
.subgroup_basic = true,
+   .subgroup_quad = true,
+   .subgroup_shuffle = true,
+   .subgroup_vote = true,
.variable_pointers = true,
},
.exts = {
@@ -278,7 +283,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
.lower_to_scalar = 1,
.lower_subgroup_masks = 1,
.lower_shuffle = 1,
-   .lower_quad =  1,
});
 
radv_optimize_nir(nir);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/7] nir: adjust subgroups instructions for 64bit ballot sizes

---
 src/compiler/nir/nir_lower_subgroups.c |  5 ++---
 src/compiler/nir/nir_opcodes.py| 12 ++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir_lower_subgroups.c 
b/src/compiler/nir/nir_lower_subgroups.c
index f18ad00c37..ea883c353e 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -309,9 +309,8 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin,
   switch (intrin->intrinsic) {
   case nir_intrinsic_ballot_bitfield_extract:
  assert(intrin->src[1].is_ssa);
- return nir_i2b(b, nir_iand(b, nir_ushr(b, int_val,
-   intrin->src[1].ssa),
-   nir_imm_int(b, 1)));
+ return nir_i2b(b, nir_iand(b, nir_ushr(b, int_val, 
intrin->src[1].ssa),
+nir_imm_intN_t(b, 1, options->ballot_bit_size)));
   case nir_intrinsic_ballot_bit_count_reduce:
  return nir_bit_count(b, int_val);
   case nir_intrinsic_ballot_find_lsb:
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 65d1320062..08f4678fbd 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -305,17 +305,17 @@ dst = 0;
 for (unsigned bit = 0; bit < 32; bit++)
dst |= ((src0 >> bit) & 1) << (31 - bit);
 """)
-unop("bit_count", tuint32, """
+unop_convert("bit_count", tuint32, tuint, """
 dst = 0;
-for (unsigned bit = 0; bit < 32; bit++) {
+for (unsigned bit = 0; bit < bit_size; bit++) {
if ((src0 >> bit) & 1)
   dst++;
 }
 """)
 
-unop_convert("ufind_msb", tint32, tuint32, """
+unop_convert("ufind_msb", tint32, tuint, """
 dst = -1;
-for (int bit = 31; bit >= 0; bit--) {
+for (int bit = bit_size - 1; bit >= 0; bit--) {
if ((src0 >> bit) & 1) {
   dst = bit;
   break;
@@ -337,9 +337,9 @@ for (int bit = 31; bit >= 0; bit--) {
 }
 """)
 
-unop("find_lsb", tint32, """
+unop_convert("find_lsb", tint32, tint, """
 dst = -1;
-for (unsigned bit = 0; bit < 32; bit++) {
+for (unsigned bit = 0; bit < bit_size; bit++) {
if ((src0 >> bit) & 1) {
   dst = bit;
   break;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/7] ac: lower 64bit subgroup intrinsics

---
 src/amd/common/ac_lower_subgroups.c | 50 ++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_lower_subgroups.c 
b/src/amd/common/ac_lower_subgroups.c
index d0782b481b..2be48e2ba1 100644
--- a/src/amd/common/ac_lower_subgroups.c
+++ b/src/amd/common/ac_lower_subgroups.c
@@ -26,9 +26,45 @@
 
 #include "ac_nir_to_llvm.h"
 
+static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, 
nir_intrinsic_instr *intrin) {
+   assert(intrin->src[0].ssa->bit_size == 64);
+   nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+   nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
+   nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_ssa_dest_init(_x->instr, _x->dest, 1, 32, NULL);
+   nir_ssa_dest_init(_y->instr, _y->dest, 1, 32, NULL);
+   intr_x->src[0] = nir_src_for_ssa(x);
+   intr_y->src[0] = nir_src_for_ssa(y);
+   intr_x->const_index[0] = intr_y->const_index[0] = 
intrin->const_index[0];
+   intr_x->const_index[1] = intr_y->const_index[1] = 
intrin->const_index[1];
+   if (intrin->intrinsic == nir_intrinsic_read_invocation ||
+   intrin->intrinsic == nir_intrinsic_shuffle ||
+   intrin->intrinsic == nir_intrinsic_quad_broadcast) {
+   nir_src_copy(_x->src[1], >src[1], intr_x);
+   nir_src_copy(_y->src[1], >src[1], intr_y);
+   }
+   intr_x->num_components = 1;
+   intr_y->num_components = 1;
+   nir_builder_instr_insert(b, _x->instr);
+   nir_builder_instr_insert(b, _y->instr);
+   return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
+}
+
 static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_instr *intrin)
 {
switch(intrin->intrinsic) {
+   case nir_intrinsic_read_invocation:
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_shuffle:
+   case nir_intrinsic_quad_broadcast:
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   if (intrin->src[0].ssa->bit_size == 64)
+   return ac_lower_subgroups_64bit(b, intrin);
+   else
+   return NULL;
case nir_intrinsic_vote_ieq:
case nir_intrinsic_vote_feq: {
nir_intrinsic_instr *rfi =
@@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder 
*b, nir_intrinsic_inst
  1, intrin->src[0].ssa->bit_size, NULL);
nir_src_copy(>src[0], >src[0], rfi);
rfi->num_components = 1;
-
+   nir_ssa_def *first_lane;
+   if (intrin->src[0].ssa->bit_size == 64) {
+   first_lane = ac_lower_subgroups_64bit(b, rfi);
+   } else {
+   nir_builder_instr_insert(b, >instr);
+   first_lane = >dest.ssa;
+   }
nir_ssa_def *is_ne;
if (intrin->intrinsic == nir_intrinsic_vote_feq)
-   is_ne = nir_fne(b, >dest.ssa, intrin->src[0].ssa);
+   is_ne = nir_fne(b, first_lane, intrin->src[0].ssa);
else
-   is_ne = nir_ine(b, >dest.ssa, intrin->src[0].ssa);
+   is_ne = nir_ine(b, first_lane, intrin->src[0].ssa);
 
nir_intrinsic_instr *ballot =
nir_intrinsic_instr_create(b->shader, 
nir_intrinsic_ballot);
@@ -50,7 +92,7 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_inst
  1, 64, NULL);
ballot->src[0] = nir_src_for_ssa(is_ne);
ballot->num_components = 1;
-
+   nir_builder_instr_insert(b, >instr);
return nir_ieq(b, >dest.ssa, nir_imm_int64(b, 0));
}
default:
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/7] ac: make ballot and umsb capable of 64bit inputs

---
 src/amd/common/ac_llvm_build.c | 33 -
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9851cafb7f..5341eb3f07 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -407,8 +407,7 @@ ac_build_ballot(struct ac_llvm_context *ctx,
 */
ac_build_optimization_barrier(ctx, [0]);
 
-   if (LLVMTypeOf(args[0]) != ctx->i32)
-   args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, "");
+   args[0] = ac_to_integer(ctx, args[0]);
 
return ac_build_intrinsic(ctx,
  "llvm.amdgcn.icmp.i32",
@@ -1267,23 +1266,39 @@ ac_build_umsb(struct ac_llvm_context *ctx,
  LLVMValueRef arg,
  LLVMTypeRef dst_type)
 {
-   LLVMValueRef args[2] = {
+   const char *intrin_name;
+   LLVMTypeRef type;
+   LLVMValueRef highest_bit;
+   LLVMValueRef zero;
+   if (ac_get_elem_bits(ctx, LLVMTypeOf(arg)) == 64) {
+   intrin_name = "llvm.ctlz.i64";
+   type = ctx->i64;
+   highest_bit = LLVMConstInt(ctx->i64, 63, false);
+   zero = ctx->i64_0;
+   } else {
+   intrin_name = "llvm.ctlz.i32";
+   type = ctx->i32;
+   highest_bit = LLVMConstInt(ctx->i32, 31, false);
+   zero = ctx->i32_0;
+   }
+
+   LLVMValueRef params[2] = {
arg,
ctx->i1true,
};
-   LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32",
- dst_type, args, ARRAY_SIZE(args),
+
+   LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type,
+ params, 2,
  AC_FUNC_ATTR_READNONE);
 
/* The HW returns the last bit index from MSB, but TGSI/NIR wants
 * the index from LSB. Invert it by doing "31 - msb". */
-   msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
-  msb, "");
+   msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
+   msb = LLVMBuildTruncOrBitCast(ctx->builder, msb, ctx->i32, "");
 
/* check for zero */
return LLVMBuildSelect(ctx->builder,
-  LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
-LLVMConstInt(ctx->i32, 0, 0), ""),
+  LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, 
zero, ""),
   LLVMConstInt(ctx->i32, -1, true), msb, "");
 }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/56] anv/entrypoints: Generalize the string map a bit

Quoting Ilia Mirkin (2018-03-08 08:59:12)
> On Thu, Mar 8, 2018 at 11:45 AM, Dylan Baker  wrote:
> > Quoting Jason Ekstrand (2018-03-07 20:22:51)
> >> Yes, that is what happened.  That said, wrote that patch in September and
> >> you've had about 6 months to look at it.  The only particularly active Mesa
> >> contributor who hasn't had access is Ilia.
> 
> [And there was no particular reason for me to review these as I am not
> involved in Intel development, and am fairly weak on Vulkan in the
> first place.]
> 
> > The understanding has always been that patches
> > that are neither trivial nor fix a critical bug should be on list at least 
> > 24
> > hours before being pushed, even after receiving a review.
> 
> I push stuff pretty much immediately after receiving any review. First
> I hear of a 24-hour rule. I even push stuff that never got review (but
> almost always sat on list in some form for a bit), but only inside of
> nouveau -- probably not OK for drivers with active multi-person teams.
> 
> For patches that substantially affect lots of drivers (either directly
> via code, or indirectly via being a shared component that sits in
> front of the drivers), it's good to ensure that all the driver
> maintainers are able to provide feedback, for which a 24 hour period
> ought to be sufficient. But for patches within a driver, I don't think
> that's particularly required.
> 
> Some patches did affect radv, and (iirc) Bas provided feedback on
> (some) of those (and I believe the two teams had been working together
> previously).
> 
> FWIW, I don't really perceive any process issue in this case.
> 
> It sucks when you notice an issue and the patch has already been
> pushed, but it happens. Not such a big deal.
> 
>   -ilia

I'll let it go then. When I was given commit access I was told that I should
wait 24 hours after sending patches unless they were trivial or fixed something
critical, ie, without them you can't compile or nothing works. I know we've
always given a lot of flexibility to vendor specific code (i965 or nouveau), but
you hope everyone can understand my frustration with a 56 patch series that I
sent review for 8 hours after it was posted to the list and I got told "Oh, I
merged that hours ago, patches welcome." 

But as I said, I'll let it go.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] nouveau 30bpp / deep color status

On Thu, Mar 8, 2018 at 11:57 AM, Mario Kleiner
 wrote:
> Cc'ing mesa-dev, which was left out.
>
>
> On 03/05/2018 01:40 PM, Ilia Mirkin wrote:
>>
>> On Mon, Mar 5, 2018 at 2:25 AM, Mario Kleiner
>>  wrote:
>>> Afaics EGL does the right thing wrt. channelmask matching of EGLConfigs
>>> to
>>> DRIconfigs, so we could probably implement dri_loader_get_cap(screen,
>>> DRI_LOADER_CAP_RGBA_ORDERING) == TRUE for the EGL loaders.
>>>
>>> But for GLX it is not so easy or quick. I looked if i could make the
>>> servers
>>> GLX send proper channelmask attributes and Mesa parsing them, but there
>>> aren't any GLX tags defined for channel masks, and all other tags come
>>> from
>>> official GLX extension headers. I'm not sure what the proper procedure
>>> for
>>> defining new tags is? Do we have to define a new GLX extension for that
>>> and
>>> get it in the Khronos registry and then back into the server/mesa
>>> code-base?
>>
>>
>> Can all of this be solved by a healthy dose of "don't do that"? i.e.
>> make sure that the DDX only ever exposes one of these at a time? And
>> also make the mesa driver only expose one as a DISPLAY_TARGET?
>>
>
> Yes, if "don't do that" is consistently possible on all future drivers.

I don't think it'd be undue burden for a driver to have to decide on
one ordering which is The Way To Do It (tm) for that hw, even if the
hw supports both. Could also drop some logic into the glx thing to
always pick a specific one in case both are supported, and hopefully
the DDX would have identical logic.

> Under EGL there is matching of channel masks, so only X11+GLX is
> problematic. Not sure if anything special would need to be done for
> XWayland, haven't looked at that at all so far. Or the modesetting ddx,
> which currently assumes xrgb ordering for 10 bit.

For the modesetting ddx, it has to switch to drmAddFB2 so that it
knows the exact format. No other way around that, unfortunately. But
that'll require work, and I'm happy enough that xf86-video-nouveau
works (as that is what I recommend to anyone who'll listen).

>
>>>
>>> The current patches in mesa for XBGR also lack enablement pieces for EGL,
>>> Wayland and X11 compositing, but that's a different problem.
>>
>>
>> EGL/drm and EGL/wayland should be enabled (look at Daniel Stone's
>> patches from a short while back, also upstream now). kmscube (with
>> some patches that are upstream now) and weston both run OK for me. I
>> think EGL/x11 is iffy though - haven't played with it.
>>
>>-ilia
>>
>
> There are some from Daniel which unify the handling of formats inside egl,
> not with any abgr2101010 definitions though. Indeed on master compositing
> doesn't work for depth 30 windows. I have some patches that fix this, and
> some hack for EGL/x11 compositing that seems to work. Will send them out
> soon.

D'oh! Those patches were definitely there. I guess they got dropped at
some point. Daniel, can you resend those?

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

2018-03-08 Thread Chris Wilson

Quoting Lionel Landwerlin (2018-03-08 15:42:53)
> +static void
> +read_gt_frequency(struct brw_context *brw,
> +  struct brw_perf_query_object *obj)
> +{
> +   const struct gen_device_info *devinfo = >screen->devinfo;
> +   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
> +  *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
> +
> +   switch (devinfo->gen) {
> +   case 7:
> +   case 8:
> +  obj->oa.gt_frequency[0] =
> + ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;
> +  obj->oa.gt_frequency[1] =
> + ((end_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

I was just thinking this was the wrong frequency conversion for byt/bsw,
but then they don't have RPSTAT1 either. Is the OA only for big core?
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/56] anv/entrypoints: Generalize the string map a bit

On Thu, Mar 8, 2018 at 11:45 AM, Dylan Baker  wrote:
> Quoting Jason Ekstrand (2018-03-07 20:22:51)
>> Yes, that is what happened.  That said, wrote that patch in September and
>> you've had about 6 months to look at it.  The only particularly active Mesa
>> contributor who hasn't had access is Ilia.

[And there was no particular reason for me to review these as I am not
involved in Intel development, and am fairly weak on Vulkan in the
first place.]

> The understanding has always been that patches
> that are neither trivial nor fix a critical bug should be on list at least 24
> hours before being pushed, even after receiving a review.

I push stuff pretty much immediately after receiving any review. First
I hear of a 24-hour rule. I even push stuff that never got review (but
almost always sat on list in some form for a bit), but only inside of
nouveau -- probably not OK for drivers with active multi-person teams.

For patches that substantially affect lots of drivers (either directly
via code, or indirectly via being a shared component that sits in
front of the drivers), it's good to ensure that all the driver
maintainers are able to provide feedback, for which a 24 hour period
ought to be sufficient. But for patches within a driver, I don't think
that's particularly required.

Some patches did affect radv, and (iirc) Bas provided feedback on
(some) of those (and I believe the two teams had been working together
previously).

FWIW, I don't really perceive any process issue in this case.

It sucks when you notice an issue and the patch has already been
pushed, but it happens. Not such a big deal.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] nouveau 30bpp / deep color status

2018-03-08 Thread Mario Kleiner

Cc'ing mesa-dev, which was left out.

On 03/05/2018 01:40 PM, Ilia Mirkin wrote:

On Mon, Mar 5, 2018 at 2:25 AM, Mario Kleiner
wrote:

On 02/05/2018 12:50 AM, Ilia Mirkin wrote:

In case anyone's curious about 30bpp framebuffer support, here's the
current status:

Kernel:

Ben and I have switched the code to using a 256-based LUT for Kepler+,
and I've also written a patch to cause the addfb ioctl to use the
proper format. You can pick this up at:

https://github.com/skeggsb/linux/commits/linux-4.16 (note the branch!)
https://patchwork.freedesktop.org/patch/202322/

With these two, you should be able to use "X -depth 30" again on any
G80+ GPU to bring up a screen (as you could in kernel 4.9 and
earlier). However this still has some deficiencies, some of which I've
addressed:

xf86-video-nouveau:

DRI3 was broken, and Xv was broken. Patches available at:

https://github.com/imirkin/xf86-video-nouveau/commits/master

mesa:

The NVIDIA hardware (pre-Kepler) can only do XBGR scanout. Further the
nouveau KMS doesn't add XRGB scanout for Kepler+ (although it could).
Mesa was only enabled for XRGB, so I've piped XBGR through all the
same places:

https://github.com/imirkin/mesa/commits/30bpp

Wrt. mesa, those patches are now in master and i think we have a bit of a
problem under X11+GLX:

https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/state_trackers/dri/dri_screen.c#n108

dri_fill_in_modes() defines MESA_FORMAT_R10G10B10A2_UNORM,
MESA_FORMAT_R10G10B10X2_UNORM at the top inbetween the BGRX/A formats
ignoring the instructions that
"/* The 32-bit RGBA format must not precede the 32-bit BGRA format.
* Likewise for RGBX and BGRX. Otherwise, the GLX client and the GLX
* server may disagree on which format the GLXFBConfig represents,
* resulting in swapped color channels."

RGBA/X formats should only be exposed
if (dri_loader_get_cap(screen, DRI_LOADER_CAP_RGBA_ORDERING))

and that is only the case for the Android loader.

The GLX code doesn't use the red/green/blueChannelMasks for proper matching
of formats, and the server doesn't even transmit those masks to the client
in the case of GLX. So whatever 10 bit format comes first will win when
building the assignment to GLXFBConfigs.

I looked at the code and how it behaves. In practice Intel gfx works because
it's a classic DRI driver with its own method of building the DRIconfig's,
and it only exposes the BGR101010 formats, so no danger of mixups. AMD's
gallium drivers expose both BGR and RGB ordered 10 bit formats, but due to
the ordering, the matching ends up only assigning the desired BGR formats
that are good for AMD hw, discarding the RGB formats. nouveau works because
it only exposes the desired RGB format for the hw. But with other gallium
drivers for some SoC's or future gallium drivers it is not so clear if the
right thing will happen. E.g., freedreno seems to support both BGR and RGB
10 bit formats as PIPE_BIND_DISPLAY_TARGET afaics, so i don't know if by
luck the right thing would happen?

FWIW freedreno does not presently support 10bpc scanout.

Afaics EGL does the right thing wrt. channelmask matching of EGLConfigs to
DRIconfigs, so we could probably implement dri_loader_get_cap(screen,
DRI_LOADER_CAP_RGBA_ORDERING) == TRUE for the EGL loaders.

But for GLX it is not so easy or quick. I looked if i could make the servers
GLX send proper channelmask attributes and Mesa parsing them, but there
aren't any GLX tags defined for channel masks, and all other tags come from
official GLX extension headers. I'm not sure what the proper procedure for
defining new tags is? Do we have to define a new GLX extension for that and
get it in the Khronos registry and then back into the server/mesa code-base?

Can all of this be solved by a healthy dose of "don't do that"? i.e.
make sure that the DDX only ever exposes one of these at a time? And
also make the mesa driver only expose one as a DISPLAY_TARGET?

Yes, if "don't do that" is consistently possible on all future drivers.
Under EGL there is matching of channel masks, so only X11+GLX is
problematic. Not sure if anything special would need to be done for
XWayland, haven't looked at that at all so far. Or the modesetting ddx,
which currently assumes xrgb ordering for 10 bit.

The current patches in mesa for XBGR also lack enablement pieces for EGL,
Wayland and X11 compositing, but that's a different problem.

EGL/drm and EGL/wayland should be enabled (look at Daniel Stone's
patches from a short while back, also upstream now). kmscube (with
some patches that are upstream now) and weston both run OK for me. I
think EGL/x11 is iffy though - haven't played with it.

-ilia

There are some from Daniel which unify the handling of formats inside
egl, not with any abgr2101010 definitions though. Indeed on master
compositing doesn't work for depth 30 windows. I have some patches that
fix this, and some hack for EGL/x11 compositing that seems to work. Will

[Mesa-dev] [PATCH 3/5] winsys/amdgpu: pad compute rings

2018-03-08 Thread Marek Olšák

From: Marek Olšák 

v2: pad with PKT2 NOPs on SI
---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index d9a95c0..a3feeb9 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1521,29 +1521,31 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
   /* pad DMA ring to 8 DWs */
   if (ws->info.chip_class <= SI) {
  while (rcs->current.cdw & 7)
 radeon_emit(rcs, 0xf000); /* NOP packet */
   } else {
  while (rcs->current.cdw & 7)
 radeon_emit(rcs, 0x); /* NOP packet */
   }
   break;
case RING_GFX:
+   case RING_COMPUTE:
   /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
   if (ws->info.gfx_ib_pad_with_type2) {
  while (rcs->current.cdw & 7)
 radeon_emit(rcs, 0x8000); /* type2 nop packet */
   } else {
  while (rcs->current.cdw & 7)
 radeon_emit(rcs, 0x1000); /* type3 nop packet */
   }
-  ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
+  if (cs->ring_type == RING_GFX)
+ ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
   break;
case RING_UVD:
case RING_UVD_ENC:
   while (rcs->current.cdw & 15)
  radeon_emit(rcs, 0x8000); /* type2 nop packet */
   break;
case RING_VCN_DEC:
   while (rcs->current.cdw & 15)
  radeon_emit(rcs, 0x81ff); /* nop packet */
   break;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 12/18] i965/blorp: Update the fast clear color address.

On Gen10, whenever we do a fast clear, blorp will update the clear color
state buffer for us, as long as we set the clear color address
correctly.

However, on a hiz clear, if the surface is already on the fast clear
state we skip the actual fast clear operation and, before gen10, only
updated the miptree. On gen10+ we need to update the clear value state
buffer too, since blorp will not be doing a fast clear and updating it
for us.

v4:
 - do not use clear_value_size in the for loop
 - Get the address of the clear color from the aux buffer or the
 clear_color_bo, depending on which one is available.
 - let core blorp update the clear color, but also update it when we
 skip a fast clear depth.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 11 +++
 src/mesa/drivers/dri/i965/brw_clear.c | 22 ++
 2 files changed, 33 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index ffd957fb866..914aeeace7a 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -185,6 +185,17 @@ blorp_surf_for_miptree(struct brw_context *brw,
 
   surf->aux_addr.buffer = aux_buf->bo;
   surf->aux_addr.offset = aux_buf->offset;
+
+  if (devinfo->gen >= 10) {
+ /* If we have a CCS surface and clear_color_bo set, use that bo as
+  * storage for the indirect clear color. Otherwise, use the extra
+  * space at the end of the aux_buffer.
+  */
+ surf->clear_color_addr = (struct blorp_address) {
+.buffer = aux_buf->clear_color_bo,
+.offset = aux_buf->clear_color_offset,
+ };
+  }
} else {
   surf->aux_addr = (struct blorp_address) {
  .buffer = NULL,
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index 8aa83722ee9..63c0b241898 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -108,6 +108,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
struct intel_mipmap_tree *mt = depth_irb->mt;
struct gl_renderbuffer_attachment *depth_att = 
>Attachment[BUFFER_DEPTH];
const struct gen_device_info *devinfo = >screen->devinfo;
+   bool same_clear_value = true;
 
if (devinfo->gen < 6)
   return false;
@@ -213,6 +214,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
   }
 
   intel_miptree_set_depth_clear_value(ctx, mt, clear_value);
+  same_clear_value = false;
}
 
bool need_clear = false;
@@ -232,6 +234,26 @@ brw_fast_clear_depth(struct gl_context *ctx)
* state then simply updating the miptree fast clear value is sufficient
* to change their clear value.
*/
+  if (devinfo->gen >= 10 && !same_clear_value) {
+ /* Before gen10, it was enough to just update the clear value in the
+  * miptree. But on gen10+, we let blorp update the clear value state
+  * buffer when doing a fast clear. Since we are skipping the fast
+  * clear here, we need to update the clear color ourselves.
+  */
+ uint32_t clear_offset = mt->hiz_buf->clear_color_offset;
+ union isl_color_value clear_color = { .f32 = { clear_value, } };
+
+ /* We can't update the clear color while the hardware is still using
+  * the previous one for a resolve or sampling from it. So make sure
+  * that there's no pending commands at this point.
+  */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+ for (int i = 0; i < 4; i++) {
+brw_store_data_imm32(brw, mt->hiz_buf->clear_color_bo,
+ clear_offset + i * 4, clear_color.u32[i]);
+ }
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+  }
   return true;
}
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st/mesa: gl_program::info.system_values_read is a 64-bit-field

2018-03-08 Thread Michel Dänzer

From: Michel Dänzer 

We were dropping the upper 32 bits, which caused assertion failures in
some compute shader piglit tests with radeonsi since the commit below.

Fixes: 752e96970303 ("compiler: Add two new system values for subgroups")
Signed-off-by: Michel Dänzer 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 
 src/mesa/state_tracker/st_mesa_to_tgsi.c   | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ccf4dabcc9f..911c855d43a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6532,10 +6532,10 @@ st_translate_program(
/* Declare misc input registers
 */
{
-  GLbitfield sysInputs = proginfo->info.system_values_read;
+  GLbitfield64 sysInputs = proginfo->info.system_values_read;
 
   for (i = 0; sysInputs; i++) {
- if (sysInputs & (1 << i)) {
+ if (sysInputs & (1ull << i)) {
 enum tgsi_semantic semName = _mesa_sysval_to_semantic(i);
 
 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
@@ -6567,7 +6567,7 @@ st_translate_program(
emit_wpos(st_context(ctx), t, proginfo, ureg,
  program->wpos_transform_const);
 
-sysInputs &= ~(1 << i);
+sysInputs &= ~(1ull << i);
  }
   }
}
@@ -6864,7 +6864,7 @@ get_mesa_program_tgsi(struct gl_context *ctx,
/* This must be done before the uniform storage is associated. */
if (shader->Stage == MESA_SHADER_FRAGMENT &&
(prog->info.inputs_read & VARYING_BIT_POS ||
-prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) {
+prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD))) {
   static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
  STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
   };
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index c76180a5799..99cddd66282 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -951,9 +951,9 @@ st_translate_mesa_program(struct gl_context *ctx,
 
/* Declare misc input registers
 */
-   GLbitfield sysInputs = program->info.system_values_read;
+   GLbitfield64 sysInputs = program->info.system_values_read;
for (i = 0; sysInputs; i++) {
-  if (sysInputs & (1 << i)) {
+  if (sysInputs & (1ull << i)) {
  unsigned semName = _mesa_sysval_to_semantic(i);
 
  t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
@@ -985,7 +985,7 @@ st_translate_mesa_program(struct gl_context *ctx,
  semName == TGSI_SEMANTIC_POSITION)
 emit_wpos(st_context(ctx), t, program, ureg);
 
-  sysInputs &= ~(1 << i);
+  sysInputs &= ~(1ull << i);
   }
}
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 18/18] intel: Remove use_clear_address flag from isl_surf_fill_state_info.

This flag was used while porting parts of the code to use the clear
color address, but other parts were not ported yet. So isl had to be
flexible enough to support both cases.

Now that the code is using exclusively clear color address for
everything Gen10+, we don't need it anymore.

Signed-off-by: Rafael Antognolli 
---
 src/intel/blorp/blorp_genX_exec.h|  4 
 src/intel/isl/isl.c  |  2 +-
 src/intel/isl/isl.h  |  7 +++
 src/intel/isl/isl_surface_state.c| 21 +++--
 src/intel/vulkan/anv_image.c |  1 -
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  7 ++-
 6 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index eef6ed8291a..29e426fbb9c 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1314,15 +1314,11 @@ blorp_emit_surface_state(struct blorp_batch *batch,
  write_disable_mask |= ISL_CHANNEL_ALPHA_BIT;
}
 
-   const bool use_clear_address =
-  GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL);
-
isl_surf_fill_state(batch->blorp->isl_dev, state,
.surf = , .view = >view,
.aux_surf = >aux_surf, .aux_usage = aux_usage,
.mocs = surface->addr.mocs,
.clear_color = surface->clear_color,
-   .use_clear_address = use_clear_address,
.write_disables = write_disable_mask);
 
blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 3566bd3f0dd..d0cd7313bc6 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -79,7 +79,7 @@ isl_device_init(struct isl_device *dev,
  RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
}
 
-   if (ISL_DEV_GEN(dev) <= 10) {
+   if (ISL_DEV_GEN(dev) < 10) {
   dev->ss.clear_value_size =
  isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index c50b78d4701..d65c621a732 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1308,12 +1308,11 @@ struct isl_surf_fill_state_info {
union isl_color_value clear_color;
 
/**
-* Send only the clear value address
+* The address of the clear color state buffer
 *
-* If set, we only pass the clear address to the GPU and it will fetch it
-* from wherever it is.
+* On gen10+, we use an address to the indirect clear color, stored in a
+* state buffer.
 */
-   bool use_clear_address;
uint64_t clear_address;
 
/**
diff --git a/src/intel/isl/isl_surface_state.c 
b/src/intel/isl/isl_surface_state.c
index bff9693f02d..77931f25aa3 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -637,21 +637,14 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, 
void *state,
 #endif
 
if (info->aux_usage != ISL_AUX_USAGE_NONE) {
-  if (info->use_clear_address) {
 #if GEN_GEN >= 10
- s.ClearValueAddressEnable = true;
- s.ClearValueAddress = info->clear_address;
-#else
- unreachable("Gen9 and earlier do not support indirect clear colors");
-#endif
-  }
-#if GEN_GEN >= 9
-  if (!info->use_clear_address) {
- s.RedClearColor = info->clear_color.u32[0];
- s.GreenClearColor = info->clear_color.u32[1];
- s.BlueClearColor = info->clear_color.u32[2];
- s.AlphaClearColor = info->clear_color.u32[3];
-  }
+  s.ClearValueAddressEnable = true;
+  s.ClearValueAddress = info->clear_address;
+#elif GEN_GEN >= 9
+  s.RedClearColor = info->clear_color.u32[0];
+  s.GreenClearColor = info->clear_color.u32[1];
+  s.BlueClearColor = info->clear_color.u32[2];
+  s.AlphaClearColor = info->clear_color.u32[3];
 #elif GEN_GEN >= 7
   /* Prior to Sky Lake, we only have one bit for the clear color which
* gives us 0 or 1 in whatever the surface's format happens to be.
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index a941559eb3a..7f16b3dd5f2 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -1162,7 +1162,6 @@ anv_image_fill_surface_state(struct anv_device *device,
   .aux_usage = aux_usage,
   .aux_address = aux_address,
   .clear_address = clear_address.offset,
-  .use_clear_address = clear_address.bo != NULL,
   .mocs = device->default_mocs,
   .x_offset_sa = tile_x_sa,
   .y_offset_sa = tile_y_sa);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

[Mesa-dev] [PATCH v4 11/18] i965/blorp: Add aux_buf variable to simplify code.

In a follow up patch, we make use of clear_color_bo, which is in
mt->mcs_buf or mt->hiz_buf. To avoid duplicating more code that does the
same thing on both aux buffers, just use aux_buf already.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 19 +++
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 1d586e5ef38..ffd957fb866 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -155,10 +155,13 @@ blorp_surf_for_miptree(struct brw_context *brw,
};
 
struct isl_surf *aux_surf = NULL;
+   struct intel_miptree_aux_buffer *aux_buf = NULL;
if (mt->mcs_buf)
-  aux_surf = >mcs_buf->surf;
+  aux_buf = mt->mcs_buf;
else if (mt->hiz_buf)
-  aux_surf = >hiz_buf->surf;
+  aux_buf = mt->hiz_buf;
+
+   aux_surf = _buf->surf;
 
if (mt->format == MESA_FORMAT_S_UINT8 && is_render_target &&
devinfo->gen <= 7)
@@ -180,16 +183,8 @@ blorp_surf_for_miptree(struct brw_context *brw,
  .mocs = surf->addr.mocs,
   };
 
-  if (mt->mcs_buf) {
- surf->aux_addr.buffer = mt->mcs_buf->bo;
- surf->aux_addr.offset = mt->mcs_buf->offset;
-  } else {
- assert(mt->hiz_buf);
- assert(surf->aux_usage == ISL_AUX_USAGE_HIZ);
-
- surf->aux_addr.buffer = mt->hiz_buf->bo;
- surf->aux_addr.offset = mt->hiz_buf->offset;
-  }
+  surf->aux_addr.buffer = aux_buf->bo;
+  surf->aux_addr.offset = aux_buf->offset;
} else {
   surf->aux_addr = (struct blorp_address) {
  .buffer = NULL,
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 10/18] i965/miptree: Add new BO for clear color.

Add an extra BO to store clear color when we receive the aux buffer from
the window system. Since we have no control over the aux buffer size in
this case, we need the new BO to store only the clear color.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 22d0ae89367..a8b89d9170a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -969,6 +969,23 @@ create_ccs_buf_for_image(struct brw_context *brw,
   return false;
}
 
+   /* On gen10+ we start using an extra space in the aux buffer to store the
+* indirect clear color. However, if we imported an image from the window
+* system with CCS, we don't have the extra space at the end of the aux
+* buffer. So create a new bo here that will store that clear color.
+*/
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   if (devinfo->gen >= 10) {
+  mt->mcs_buf->clear_color_bo =
+ brw_bo_alloc(brw->bufmgr, "clear_color_bo",
+  brw->isl_dev.ss.clear_color_state_size, 64);
+  if (!mt->mcs_buf->clear_color_bo) {
+ free(mt->mcs_buf);
+ mt->mcs_buf = NULL;
+ return false;
+  }
+   }
+
mt->mcs_buf->bo = image->bo;
brw_bo_reference(image->bo);
 
@@ -1211,6 +1228,7 @@ intel_miptree_aux_buffer_free(struct 
intel_miptree_aux_buffer *aux_buf)
   return;
 
brw_bo_unreference(aux_buf->bo);
+   brw_bo_unreference(aux_buf->clear_color_bo);
 
free(aux_buf);
 }
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 09/18] i965/miptree: Add space to store the clear value in the aux surface.

Similarly to vulkan where we store the clear value in the aux surface,
we can do the same in GL.

v2: Remove unneeded extra function.
v3: Use clear_value_state_size instead of clear_value_size.
v4:
 - rename to clear_color_state_size
 - store clear_color_bo and clear_color_offset in the aux buf struct

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 16 
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 16 
 2 files changed, 32 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index c6213b21629..22d0ae89367 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1678,6 +1678,17 @@ intel_alloc_aux_buffer(struct brw_context *brw,
   return false;
 
buf->size = aux_surf->size;
+
+   const struct gen_device_info *devinfo = >screen->devinfo;
+   if (devinfo->gen >= 10) {
+  /* On CNL, instead of setting the clear color in the SURFACE_STATE, we
+   * will set a pointer to a dword somewhere that contains the color. So,
+   * allocate the space for the clear color value here on the aux buffer.
+   */
+  buf->clear_color_offset = buf->size;
+  buf->size += brw->isl_dev.ss.clear_color_state_size;
+   }
+
buf->pitch = aux_surf->row_pitch;
buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
 
@@ -1692,6 +1703,11 @@ intel_alloc_aux_buffer(struct brw_context *brw,
   return NULL;
}
 
+   if (devinfo->gen >= 10) {
+  buf->clear_color_bo = buf->bo;
+  brw_bo_reference(buf->clear_color_bo);
+   }
+
buf->surf = *aux_surf;
 
return buf;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 07c85807e80..54d36400757 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -180,6 +180,22 @@ struct intel_miptree_aux_buffer
 * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch
 */
uint32_t qpitch;
+
+   /**
+* Buffer object containing the indirect clear color.
+*
+* @see create_ccs_buf_for_image
+* @see RENDER_SURFACE_STATE.ClearValueAddress
+*/
+   struct brw_bo *clear_color_bo;
+
+   /**
+* Offset into bo where the clear color can be found.
+*
+* @see create_ccs_buf_for_image
+* @see RENDER_SURFACE_STATE.ClearValueAddress
+*/
+   uint32_t clear_color_offset;
 };
 
 struct intel_mipmap_tree
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 15/18] anv: Emit the fast clear color address, instead of value.

On Gen10+, instead of copying the clear color from the state buffer to
the surface state, just use the address of the state buffer in the
surface state directly. This way we can avoid the copy from state buffer
to surface state.

v4:
 - Remove use_clear_address from anv code. (Jason)
 - Use the helper to extract clear color from attachment (Jason)

Signed-off-by: Rafael Antognolli 
---
 src/intel/vulkan/anv_image.c   | 17 +
 src/intel/vulkan/anv_private.h |  5 
 src/intel/vulkan/genX_cmd_buffer.c | 52 +++---
 3 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index d9b5d266020..da4601ce20e 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -1059,6 +1059,13 @@ anv_image_fill_surface_state(struct anv_device *device,
const uint64_t aux_address = aux_usage == ISL_AUX_USAGE_NONE ?
   0 : (image->planes[plane].bo_offset + aux_surface->offset);
 
+   struct anv_address clear_address = { .bo = NULL };
+   state_inout->clear_address = 0;
+   if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE &&
+   aux_usage != ISL_AUX_USAGE_HIZ) {
+  clear_address = anv_image_get_clear_color_addr(device, image, aspect);
+   }
+
if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
!(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY) &&
!isl_has_matching_typed_storage_image_format(>info,
@@ -1076,6 +1083,7 @@ anv_image_fill_surface_state(struct anv_device *device,
 .mocs = device->default_mocs);
   state_inout->address = address,
   state_inout->aux_address = 0;
+  state_inout->clear_address = 0;
} else {
   if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
   !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY)) {
@@ -1149,6 +1157,8 @@ anv_image_fill_surface_state(struct anv_device *device,
   .aux_surf = _surface->isl,
   .aux_usage = aux_usage,
   .aux_address = aux_address,
+  .clear_address = clear_address.offset,
+  .use_clear_address = clear_address.bo != NULL,
   .mocs = device->default_mocs,
   .x_offset_sa = tile_x_sa,
   .y_offset_sa = tile_y_sa);
@@ -1163,6 +1173,13 @@ anv_image_fill_surface_state(struct anv_device *device,
   assert((aux_address & 0xfff) == 0);
   assert(aux_address == (*aux_addr_dw & 0xf000));
   state_inout->aux_address = *aux_addr_dw;
+
+  if (device->info.gen >= 10 && clear_address.bo) {
+ uint32_t *clear_addr_dw = state_inout->state.map +
+   device->isl_dev.ss.clear_color_state_offset;
+ assert((clear_address.offset & 0x3f) == 0);
+ state_inout->clear_address = *clear_addr_dw;
+  }
}
 
anv_state_flush(device, state_inout->state);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 497c61fab14..fefabd98fda 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1704,6 +1704,11 @@ struct anv_surface_state {
 * bits of this address include extra aux information.
 */
uint64_t aux_address;
+   /* Address of the clear color, if any
+*
+* This address is relative to the start of the BO.
+*/
+   uint64_t clear_address;
 };
 
 /**
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index aa995014144..ae4616eb451 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -200,6 +200,17 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
   if (result != VK_SUCCESS)
  anv_batch_set_error(_buffer->batch, result);
}
+
+   if (state.clear_address) {
+  VkResult result =
+ anv_reloc_list_add(_buffer->surface_relocs,
+_buffer->pool->alloc,
+state.state.offset +
+isl_dev->ss.clear_color_state_offset,
+image->planes[image_plane].bo, 
state.clear_address);
+  if (result != VK_SUCCESS)
+ anv_batch_set_error(_buffer->batch, result);
+   }
 }
 
 static void
@@ -1123,6 +1134,34 @@ transition_color_buffer(struct anv_cmd_buffer 
*cmd_buffer,
   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
 }
 
+static void
+update_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
+const struct anv_attachment_state *att_state,
+const struct anv_image_view *iview)
+{
+   assert(GEN_GEN >= 10);
+   assert(iview->image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+
+   struct anv_address clear_address =
+  anv_image_get_clear_color_addr(cmd_buffer->device, iview->image,
+

[Mesa-dev] [PATCH v4 03/18] intel/genxml: Use a single field for clear color address on gen10.

genxml does not support having two address fields with different names
but same position in the state struct. Both "Clear Color Address"
and "Clear Depth Address Low" mean the same thing, only for different
surface types.

To workaround this genxml limitation, rename "Clear Color Address"
to "Clear Value Address" and use it for both color and depth. Do the
same for the high bits.

TODO: add support for multiple addresses at the same position in the
xml.

v2: Combine high and low order bits into a single address field.

Signed-off-by: Rafael Antognolli 
Reviewed-by: Jason Ekstrand 
Reviewed-by: Jordan Justen 
---
 src/intel/genxml/gen10.xml | 7 +++
 src/intel/genxml/gen11.xml | 7 +++
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
index 2d36957c2a5..6302497a5e9 100644
--- a/src/intel/genxml/gen10.xml
+++ b/src/intel/genxml/gen10.xml
@@ -575,12 +575,11 @@
 
 
 
+
+
 
-
-
 
-
-
 
 
   
diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
index a93b62aa4cf..023b56f83f1 100644
--- a/src/intel/genxml/gen11.xml
+++ b/src/intel/genxml/gen11.xml
@@ -576,13 +576,12 @@
 
 
 
+
+
 
-
-
 
 
-
-
 
 
   
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 08/18] intel/blorp: Update clear color state buffer during fast clears.

We always want to update the fast clear color during a fast clear on
i965. On anv, we doing that before a resolve, but by adding support to
blorp, we can do a similar thing and update it during a fast clear
instead.

The goal is to remove some code from anv that does such update, and
centralize everything in blorp, hopefully removing a lot of code
duplication. It also allows us to have a similar behavior on gen < 9 and
gen >= 10.

Signed-off-by: Rafael Antognolli 
---
 src/intel/blorp/blorp_genX_exec.h | 48 +++
 1 file changed, 48 insertions(+)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index c68767a2faa..eef6ed8291a 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1642,6 +1642,51 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
 }
 #endif
 
+static void
+blorp_update_clear_color(struct blorp_batch *batch,
+ const struct brw_blorp_surface_info *info,
+ enum isl_aux_op op)
+{
+   if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) {
+#if GEN_GEN >= 9
+  for (int i = 0; i < 4; i++) {
+ blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
+sdi.Address = info->clear_color_addr;
+sdi.Address.offset += i * 4;
+sdi.ImmediateData = info->clear_color.u32[i];
+ }
+  }
+#elif GEN_GEN >= 7
+  blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = info->clear_color_addr;
+ sdi.ImmediateData = ISL_CHANNEL_SELECT_RED   << 25 |
+ ISL_CHANNEL_SELECT_GREEN << 22 |
+ ISL_CHANNEL_SELECT_BLUE  << 19 |
+ ISL_CHANNEL_SELECT_ALPHA << 16;
+ if (isl_format_has_int_channel(info->view.format)) {
+for (unsigned i = 0; i < 4; i++) {
+   assert(info->clear_color.u32[i] == 0 ||
+  info->clear_color.u32[i] == 1);
+}
+sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31;
+sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30;
+sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29;
+sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28;
+ } else {
+for (unsigned i = 0; i < 4; i++) {
+   assert(info->clear_color.f32[i] == 0.0f ||
+  info->clear_color.f32[i] == 1.0f);
+}
+sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31;
+sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30;
+sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29;
+sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28;
+ }
+  }
+#endif
+   }
+}
+
 /**
  * \brief Execute a blit or render pass operation.
  *
@@ -1654,6 +1699,9 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
 static void
 blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
 {
+   blorp_update_clear_color(batch, >dst, params->fast_clear_op);
+   blorp_update_clear_color(batch, >depth, params->hiz_op);
+
 #if GEN_GEN >= 8
if (params->hiz_op != ISL_AUX_OP_NONE) {
   blorp_emit_gen8_hiz_op(batch, params);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 16/18] anv: Use clear address for HiZ fast clears too.

Store the default clear address for HiZ fast clears on a global bo, and
point to it when needed.

Signed-off-by: Rafael Antognolli 
---
 src/intel/vulkan/anv_device.c  | 19 +++
 src/intel/vulkan/anv_image.c   | 10 +++---
 src/intel/vulkan/anv_private.h |  1 +
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index d8c4e986316..c636abea1c7 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1426,6 +1426,20 @@ vk_priority_to_gen(int priority)
}
 }
 
+static void
+anv_device_init_hiz_clear_batch(struct anv_device *device)
+{
+   anv_bo_init_new(>hiz_clear_bo, device, 4096);
+   uint32_t *map = anv_gem_mmap(device, device->hiz_clear_bo.gem_handle,
+0, 4096, 0);
+
+   union isl_color_value hiz_clear = { .u32 = { 0, } };
+   hiz_clear.f32[0] = ANV_HZ_FC_VAL;
+
+   memcpy(map, hiz_clear.u32, sizeof(hiz_clear.u32));
+   anv_gem_munmap(map, device->hiz_clear_bo.size);
+}
+
 VkResult anv_CreateDevice(
 VkPhysicalDevicephysicalDevice,
 const VkDeviceCreateInfo*   pCreateInfo,
@@ -1606,6 +1620,9 @@ VkResult anv_CreateDevice(
 
anv_device_init_trivial_batch(device);
 
+   if (device->info.gen >= 10)
+  anv_device_init_hiz_clear_batch(device);
+
anv_scratch_pool_init(device, >scratch_pool);
 
anv_queue_init(device, >queue);
@@ -1699,6 +1716,8 @@ void anv_DestroyDevice(
anv_gem_close(device, device->workaround_bo.gem_handle);
 
anv_gem_close(device, device->trivial_batch_bo.gem_handle);
+   if (device->info.gen >= 10)
+  anv_gem_close(device, device->hiz_clear_bo.gem_handle);
 
anv_state_pool_finish(>surface_state_pool);
anv_state_pool_finish(>instruction_state_pool);
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index da4601ce20e..a941559eb3a 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -1061,9 +1061,13 @@ anv_image_fill_surface_state(struct anv_device *device,
 
struct anv_address clear_address = { .bo = NULL };
state_inout->clear_address = 0;
-   if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE &&
-   aux_usage != ISL_AUX_USAGE_HIZ) {
-  clear_address = anv_image_get_clear_color_addr(device, image, aspect);
+
+   if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE) {
+  if (aux_usage == ISL_AUX_USAGE_HIZ) {
+ clear_address = (struct anv_address) { .bo = >hiz_clear_bo };
+  } else {
+ clear_address = anv_image_get_clear_color_addr(device, image, aspect);
+  }
}
 
if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index fefabd98fda..b7de7621250 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -895,6 +895,7 @@ struct anv_device {
 
 struct anv_bo   workaround_bo;
 struct anv_bo   trivial_batch_bo;
+struct anv_bo   hiz_clear_bo;
 
 struct anv_pipeline_cache   blorp_shader_cache;
 struct blorp_contextblorp;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 17/18] anv: Make blorp update the clear color.