Re: [Mesa-dev] [PATCH] radv: using tls to store llvm related info and speed up compiles (v9)

2018-07-11 Thread Michael Schellenberger Costa
Hi Dave,

-Ursprüngliche Nachricht-
Von: mesa-dev  Im Auftrag von Dave 
Airlie
Gesendet: Montag, 9. Juli 2018 23:15
An: mesa-dev@lists.freedesktop.org
Betreff: [Mesa-dev] [PATCH] radv: using tls to store llvm related info and 
speed up compiles (v9)

From: Dave Airlie 

This uses the common compiler passes abstraction to help radv
avoid fixed cost compiler overheads. This uses a linked list per
thread stored in thread local storage, with an entry in the list
for each target machine.

This should remove all the fixed overheads setup costs of creating
the pass manager each time.

This takes a demo app time to compile the radv meta shaders on nocache
and exit from 1.7s to 1s. It also has been reported to take the startup
time of uncached shaders on RoTR from 12m24s to 11m35s (Alex)

v2: fix llvm6 build, inline emit function, handle multiple targets
in one thread
v3: rebase and port onto new structure
v4: rename some vars (Bas)
v5: drag all code into radv for now, we can refactor it out later
for radeonsi if we make it shareable
v6: use a bit more C++ in the wrapper
v7: logic bugs fixed so it actually runs again.
v8: rebase on top of radeonsi changes.
v9: drop some C++ headers, cleanup list entry
v10: use pop_back (didn't have enough caffeine)
---
 src/amd/vulkan/Makefile.sources |   2 +
 src/amd/vulkan/meson.build  |   2 +
 src/amd/vulkan/radv_debug.h |   1 +
 src/amd/vulkan/radv_device.c|   1 +
 src/amd/vulkan/radv_llvm_helper.cpp | 140 
 src/amd/vulkan/radv_nir_to_llvm.c   |  27 +-
 src/amd/vulkan/radv_shader.c|  10 +-
 src/amd/vulkan/radv_shader_helper.h |  44 +
 8 files changed, 199 insertions(+), 28 deletions(-)
 create mode 100644 src/amd/vulkan/radv_llvm_helper.cpp
 create mode 100644 src/amd/vulkan/radv_shader_helper.h

diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources
index 70d56e88cb3..152fdd7cb71 100644
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -54,6 +54,7 @@ VULKAN_FILES := \
radv_meta_resolve_cs.c \
radv_meta_resolve_fs.c \
radv_nir_to_llvm.c \
+   radv_llvm_helper.cpp \
radv_pass.c \
radv_pipeline.c \
radv_pipeline_cache.c \
@@ -62,6 +63,7 @@ VULKAN_FILES := \
radv_shader.c \
radv_shader_info.c \
radv_shader.h \
+   radv_shader_helper.h \
radv_query.c \
radv_util.c \
radv_util.h \
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 22857926fa1..9f2842182e7 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -67,6 +67,7 @@ libradv_files = files(
   'radv_descriptor_set.h',
   'radv_formats.c',
   'radv_image.c',
+  'radv_llvm_helper.cpp',
   'radv_meta.c',
   'radv_meta.h',
   'radv_meta_blit.c',
@@ -88,6 +89,7 @@ libradv_files = files(
   'radv_radeon_winsys.h',
   'radv_shader.c',
   'radv_shader.h',
+  'radv_shader_helper.h',
   'radv_shader_info.c',
   'radv_query.c',
   'radv_util.c',
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index f1b0dc26a63..9fe4c3b7404 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -49,6 +49,7 @@ enum {
RADV_DEBUG_ERRORS= 0x8,
RADV_DEBUG_STARTUP   = 0x10,
RADV_DEBUG_CHECKIR   = 0x20,
+   RADV_DEBUG_NOTHREADLLVM  = 0x40,
 };
 
 enum {
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ad3465f594e..73c48cef1f0 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = {
{"errors", RADV_DEBUG_ERRORS},
{"startup", RADV_DEBUG_STARTUP},
{"checkir", RADV_DEBUG_CHECKIR},
+   {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
{NULL, 0}
 };
 
diff --git a/src/amd/vulkan/radv_llvm_helper.cpp 
b/src/amd/vulkan/radv_llvm_helper.cpp
new file mode 100644
index 000..ed05e1197ec
--- /dev/null
+++ b/src/amd/vulkan/radv_llvm_helper.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2018 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE 

Re: [Mesa-dev] [PATCH 6/7] ac: handle subgroup intrinsics

2018-03-08 Thread Michael Schellenberger Costa

HI Daniel,


Am 08.03.2018 um 18:10 schrieb Daniel Schürmann:

---
  src/amd/common/ac_nir_to_llvm.c | 66 +++--
  1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b85069860..0f4cc32f15 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4363,36 +4363,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = ac_build_ballot(>ac, get_src(ctx, instr->src[0]));
break;
case nir_intrinsic_read_invocation:
-   case nir_intrinsic_read_first_invocation: {
-   LLVMValueRef args[2];
-
-   /* Value */
-   args[0] = get_src(ctx, instr->src[0]);
-
-   unsigned num_args;
-   const char *intr_name;
-   if (instr->intrinsic == nir_intrinsic_read_invocation) {
-   num_args = 2;
-   intr_name = "llvm.amdgcn.readlane";
-
-   /* Invocation */
-   args[1] = get_src(ctx, instr->src[1]);
-   } else {
-   num_args = 1;
-   intr_name = "llvm.amdgcn.readfirstlane";
-   }
-
-   /* We currently have no other way to prevent LLVM from lifting 
the icmp
-* calls to a dominating basic block.
-*/
-   ac_build_optimization_barrier(>ac, [0]);
-
-   result = ac_build_intrinsic(>ac, intr_name,
-   ctx->ac.i32, args, num_args,
-   AC_FUNC_ATTR_READNONE |
-   AC_FUNC_ATTR_CONVERGENT);
+   result = ac_build_readlane(>ac, get_src(ctx, 
instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_read_first_invocation:
+   result = ac_build_readlane(>ac, get_src(ctx, 
instr->src[0]), NULL);
break;
-   }
case nir_intrinsic_load_subgroup_invocation:
result = ac_get_thread_id(>ac);
break;
@@ -4646,6 +4622,38 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
break;
}
+   case nir_intrinsic_shuffle:
+   result = ac_build_shuffle(>ac, get_src(ctx, instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_reduce:
+   result = ac_build_reduce(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0],
Identation here and below is off. Took me a second look to see 
instr->const_index[0] is just the argument.

--Michael

+   instr->const_index[1]);
+   break;
+   case nir_intrinsic_inclusive_scan:
+   result = ac_build_inclusive_scan(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_exclusive_scan:
+   result = ac_build_exclusive_scan(>ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_quad_broadcast: {
+   unsigned lane = instr->const_index[0];
+   result = ac_build_quad_swizzle(>ac, get_src(ctx, 
instr->src[0]),
+   lane, lane, lane, lane);
+   break;
+   }
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   result = ac_build_quad_swap(>ac, get_src(ctx, 
instr->src[0]),
+   instr->const_index[0]);
+   break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(>instr, stderr);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/7] ac: lower 64bit subgroup intrinsics

2018-03-08 Thread Michael Schellenberger Costa

Hi Daniel,


Am 08.03.2018 um 18:10 schrieb Daniel Schürmann:

---
  src/amd/common/ac_lower_subgroups.c | 50 ++---
  1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_lower_subgroups.c 
b/src/amd/common/ac_lower_subgroups.c
index d0782b481b..2be48e2ba1 100644
--- a/src/amd/common/ac_lower_subgroups.c
+++ b/src/amd/common/ac_lower_subgroups.c
@@ -26,9 +26,45 @@
  
  #include "ac_nir_to_llvm.h"
  
+static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, nir_intrinsic_instr *intrin) {

+   assert(intrin->src[0].ssa->bit_size == 64);
+   nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+   nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

The extra space looks before x/y looks wrong.

+   nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_ssa_dest_init(_x->instr, _x->dest, 1, 32, NULL);
+   nir_ssa_dest_init(_y->instr, _y->dest, 1, 32, NULL);
+   intr_x->src[0] = nir_src_for_ssa(x);
+   intr_y->src[0] = nir_src_for_ssa(y);
+   intr_x->const_index[0] = intr_y->const_index[0] = 
intrin->const_index[0];
+   intr_x->const_index[1] = intr_y->const_index[1] = 
intrin->const_index[1];
+   if (intrin->intrinsic == nir_intrinsic_read_invocation ||
+   intrin->intrinsic == nir_intrinsic_shuffle ||
+   intrin->intrinsic == nir_intrinsic_quad_broadcast) {

Indentation is off for the other conditions.

+   nir_src_copy(_x->src[1], >src[1], intr_x);
+   nir_src_copy(_y->src[1], >src[1], intr_y);
+   }
+   intr_x->num_components = 1;
+   intr_y->num_components = 1;
+   nir_builder_instr_insert(b, _x->instr);
+   nir_builder_instr_insert(b, _y->instr);
+   return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
+}


That said could you make a helper function:

static nir_intrinsic_instr 
*ac_lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr 
*intrin, unsigned int component) {
nir_ssa_def *comp;
if (component == 0)
comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
else
    comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
intr->src[0] = nir_src_for_ssa(comp);

intr->const_index[0] = intrin->const_index[0];
intr->const_index[1] = intrin->const_index[1];
if (intrin->intrinsic == nir_intrinsic_read_invocation ||
intrin->intrinsic == nir_intrinsic_shuffle ||
intrin->intrinsic == nir_intrinsic_quad_broadcast) {
nir_src_copy(>src[1], >src[1], intr);
}
intr->num_components = 1;
return intr;
}

And then simplify into:

static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, 
nir_intrinsic_instr *intrin) {
assert(intrin->src[0].ssa->bit_size == 64);
nir_intrinsic_instr *intr_x = 
ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 0);
nir_intrinsic_instr *intr_y = 
ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 1);

nir_builder_instr_insert(b, _x->instr);
nir_builder_instr_insert(b, _y->instr);
return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
}

--Michael


+
  static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_instr *intrin)
  {
switch(intrin->intrinsic) {
+   case nir_intrinsic_read_invocation:
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_shuffle:
+   case nir_intrinsic_quad_broadcast:
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   if (intrin->src[0].ssa->bit_size == 64)
+   return ac_lower_subgroups_64bit(b, intrin);
+   else
+   return NULL;
case nir_intrinsic_vote_ieq:
case nir_intrinsic_vote_feq: {
nir_intrinsic_instr *rfi =
@@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder 
*b, nir_intrinsic_inst
  1, intrin->src[0].ssa->bit_size, NULL);
nir_src_copy(>src[0], >src[0], rfi);
rfi->num_components = 1;
-
+   nir_ssa_def *first_lane;
+   if (intrin->src[0].ssa->bit_size == 64) {
+   first_lane = ac_lower_subgroups_64bit(b, rfi);
+   } else {
+   nir_builder_instr_insert(b, >instr);
+   first_lane = >dest.ssa;
+   }
nir_ssa_def *is_ne;
if (intrin->intrinsic == 

Re: [Mesa-dev] [PATCH] anv/cmd_buffer: Avoid unnecessary transitions before fast clears

2018-02-26 Thread Michael Schellenberger Costa
HI Jason,

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Jason Ekstrand
Gesendet: Montag, 26. Februar 2018 03:33
An: mesa-dev@lists.freedesktop.org
Cc: Jason Ekstrand 
Betreff: [Mesa-dev] [PATCH] anv/cmd_buffer: Avoid unnecessary transitions 
before fast clears

Previously, we would always apply the layout transition at the beginning
of the subpass and then do the clear whether fast or slow.  This meant
that there were some cases, specifically when the initial layout is
VK_IMAGE_LAYOUT_UNDEFINED, where we would end up doing a fast-clear or
ambiguate followed immediately by a fast-clear.  This isn't usually
terribly expensive, but it is a waste that we can avoid easily enough
now that we're doing everything at the same time in begin_subpass.

This improves the performance of the Sascha multisampling demo on my
laptop by around 10% by avoiding a duplicate MCS clear.
---
 src/intel/vulkan/genX_cmd_buffer.c | 158 -
 1 file changed, 101 insertions(+), 57 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 98e58ca..743c25c0 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3436,43 +3436,24 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
  target_layout = subpass->attachments[i].layout;
   }
 
-  if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
- assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-
- uint32_t base_layer, layer_count;
- if (image->type == VK_IMAGE_TYPE_3D) {
-base_layer = 0;
-layer_count = anv_minify(iview->image->extent.depth,
- iview->planes[0].isl.base_level);
- } else {
-base_layer = iview->planes[0].isl.base_array_layer;
-layer_count = fb->layers;
- }
-
- transition_color_buffer(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
- iview->planes[0].isl.base_level, 1,
- base_layer, layer_count,
- att_state->current_layout, target_layout);
-  } else if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
- transition_depth_buffer(cmd_buffer, image,
- att_state->current_layout, target_layout);
- att_state->aux_usage =
-anv_layout_to_aux_usage(_buffer->device->info, image,
-VK_IMAGE_ASPECT_DEPTH_BIT, target_layout);
+  uint32_t base_layer, layer_count;
+  if (image->type == VK_IMAGE_TYPE_3D) {
+ base_layer = 0;
+ layer_count = anv_minify(iview->image->extent.depth,
+  iview->planes[0].isl.base_level);
+  } else {
+ base_layer = iview->planes[0].isl.base_array_layer;
+ layer_count = fb->layers;
   }
-  att_state->current_layout = target_layout;
-
-  if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
- assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-
- /* Multi-planar images are not supported as attachments */
- assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(image->n_planes == 1);
 
- uint32_t base_clear_layer = iview->planes[0].isl.base_array_layer;
- uint32_t clear_layer_count = fb->layers;
+  /* Clears are based on the image view for 3D surfaces but transitions
+   * are done on an entire miplevel at a time.
+   */
+  uint32_t base_clear_layer = iview->planes[0].isl.base_array_layer;
+  uint32_t clear_layer_count = fb->layers;
 
- if (att_state->fast_clear) {
+  if (att_state->fast_clear) {
+ if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
 /* We only support fast-clears on the first layer */
 assert(iview->planes[0].isl.base_level == 0);
 assert(iview->planes[0].isl.base_array_layer == 0);
@@ -3484,9 +3465,17 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
anv_image_mcs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
 0, 1, ISL_AUX_OP_FAST_CLEAR, false);
 }
+
 base_clear_layer++;
 clear_layer_count--;
 
+/* Performing a fast clear takes care of all our transition needs
+ * for the first slice.  Increment the base layer and layer count
+ * so that later transitions don't touch layer 0.
+ */
+base_layer++;
+layer_count--;
Code and Comment do not match here. Given the original code do you mean 
Increment base_layer and decrement layer_count?
--Michael
+
 genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state,
  

Re: [Mesa-dev] [PATCH] ac/nir: fix user sgpr allocations on gfx9

2018-01-19 Thread Michael Schellenberger Costa

Hi Dave


Am 19.01.2018 um 00:22 schrieb Dave Airlie:

From: Dave Airlie 

The code to decide how to allocate sgprs didn't take into
account the merged shaders on gfx9.

This updates the code to count the correct number of requires
sgprs, which means we should be able to enable push const
inlining easier.

Signed-off-by: Dave Airlie 
---
  src/amd/common/ac_nir_to_llvm.c | 38 +++---
  1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 02a46dab4db..7560b61e99b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -565,8 +565,21 @@ static bool needs_view_index_sgpr(struct 
nir_to_llvm_context *ctx,
return false;
  }
  
+static int add_vertex_user_sgprs(const struct ac_shader_info *shader_info)

+{
+   int sgpr_count = 0;
+   sgpr_count += shader_info->vs.has_vertex_buffers ? 2 : 0;
+   if (shader_info->vs.needs_draw_id) {
+   sgpr_count += 3;
+   } else {
+   sgpr_count += 2;
+   }
+   return sgpr_count;
+}
+


Wouldn`t this be better written as:

static int add_vertex_user_sgprs(const struct ac_shader_info *shader_info)
{
int sgpr_count = shader_info->vs.has_vertex_buffers ? 2 : 0;
sgpr_count += shader_info->vs.needs_draw_id ? 3 : 2;
return sgpr_count;
}

--Michael


  static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
gl_shader_stage stage,
+   bool has_previous_stage, gl_shader_stage 
previous_stage,
bool needs_view_index,
struct user_sgpr_info *user_sgpr_info)
  {
@@ -589,7 +602,6 @@ static void allocate_user_sgprs(struct nir_to_llvm_context 
*ctx,
user_sgpr_info->sgpr_count += 2;
}
  
-	/* FIXME: fix the number of user sgprs for merged shaders on GFX9 */

switch (stage) {
case MESA_SHADER_COMPUTE:
if (ctx->shader_info->info.cs.uses_grid_size)
@@ -599,24 +611,28 @@ static void allocate_user_sgprs(struct 
nir_to_llvm_context *ctx,
user_sgpr_info->sgpr_count += 
ctx->shader_info->info.ps.needs_sample_positions;
break;
case MESA_SHADER_VERTEX:
-   if (!ctx->is_gs_copy_shader) {
-   user_sgpr_info->sgpr_count += 
ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
-   if (ctx->shader_info->info.vs.needs_draw_id) {
-   user_sgpr_info->sgpr_count += 3;
-   } else {
-   user_sgpr_info->sgpr_count += 2;
-   }
-   }
+   if (!ctx->is_gs_copy_shader)
+   user_sgpr_info->sgpr_count += 
add_vertex_user_sgprs(>shader_info->info);
if (ctx->options->key.vs.as_ls)
user_sgpr_info->sgpr_count++;
break;
case MESA_SHADER_TESS_CTRL:
-   user_sgpr_info->sgpr_count += 4;
+   if (has_previous_stage) {
+   user_sgpr_info->sgpr_count += 
add_vertex_user_sgprs(>shader_info->info);
+   user_sgpr_info->sgpr_count += 5;
+   } else
+   user_sgpr_info->sgpr_count += 4;
break;
case MESA_SHADER_TESS_EVAL:
user_sgpr_info->sgpr_count += 1;
break;
case MESA_SHADER_GEOMETRY:
+   if (has_previous_stage) {
+   if (previous_stage == MESA_SHADER_VERTEX)
+   user_sgpr_info->sgpr_count += 
add_vertex_user_sgprs(>shader_info->info);
+   else if (previous_stage == MESA_SHADER_TESS_EVAL)
+   user_sgpr_info->sgpr_count += 1;
+   }
user_sgpr_info->sgpr_count += 2;
break;
default:
@@ -798,7 +814,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
struct arg_info args = {};
LLVMValueRef desc_sets;
bool needs_view_index = needs_view_index_sgpr(ctx, stage);
-   allocate_user_sgprs(ctx, stage, needs_view_index, _sgpr_info);
+   allocate_user_sgprs(ctx, stage, has_previous_stage, stage, 
needs_view_index, _sgpr_info);
  
  	if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {

add_arg(, ARG_SGPR, const_array(ctx->ac.v4i32, 16),


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] spirv: Add basic type validation for OpLoad, OpStore, and OpCopyMemory

2017-12-08 Thread Michael Schellenberger Costa

Hi Jason,


Am 07.12.2017 um 17:12 schrieb Jason Ekstrand:

---
  src/compiler/spirv/vtn_variables.c | 18 ++
  1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index cf44ed3..8ce19ff 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1969,6 +1969,9 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_pointer);
struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_pointer);
  
+  vtn_fail_if(dest->type->deref != src->type->deref,

+  "Result and pointer types of OpLoad do not match");
This should be OpCopyMemory? On a more general side: As you want to 
cover every OpCode, why not overload vtn_fail_if() so that it takes the 
OpCode and then prepends it to the error message, e.g.:


vtn_fail_if(dest->type->deref != src->type->deref, opcode,"Result and pointer types 
of do not match");

Would extend to

"OpCodeMemory: Result and pointer types of do not match"

That way there is no chance to really mess op the opcodes.

All the best
Michael


+
vtn_variable_copy(b, dest->pointer, src->pointer);
break;
 }
@@ -1976,8 +1979,11 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
 case SpvOpLoad: {
struct vtn_type *res_type =
   vtn_value(b, w[1], vtn_value_type_type)->type;
-  struct vtn_pointer *src =
- vtn_value(b, w[3], vtn_value_type_pointer)->pointer;
+  struct vtn_value *src_val = vtn_value(b, w[3], vtn_value_type_pointer);
+  struct vtn_pointer *src = src_val->pointer;
+
+  vtn_fail_if(res_type != src_val->type->deref,
+  "Result and pointer types of OpLoad do not match");
  
if (src->mode == vtn_variable_mode_image ||

src->mode == vtn_variable_mode_sampler) {
@@ -1990,8 +1996,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
 }
  
 case SpvOpStore: {

-  struct vtn_pointer *dest =
- vtn_value(b, w[1], vtn_value_type_pointer)->pointer;
+  struct vtn_value *dest_val = vtn_value(b, w[1], vtn_value_type_pointer);
+  struct vtn_pointer *dest = dest_val->pointer;
+  struct vtn_value *src_val = vtn_untyped_value(b, w[2]);
+
+  vtn_fail_if(dest_val->type->deref != src_val->type,
+  "Value and pointer types of OpStore do not match");
  
if (glsl_type_is_sampler(dest->type->type)) {

   vtn_warn("OpStore of a sampler detected.  Doing on-the-fly copy "


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/28] vulkan/wsi: use function ptr definitions from the spec.

2017-11-17 Thread Michael Schellenberger Costa
Hi Jason,

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Jason Ekstrand
Gesendet: Donnerstag, 16. November 2017 22:29
An: mesa-dev@lists.freedesktop.org
Cc: Dave Airlie 
Betreff: [Mesa-dev] [PATCH 01/28] vulkan/wsi: use function ptr definitions from 
the spec.

From: Dave Airlie 

This just seems cleaner, and we may expand this in future.

Signed-off-by: Dave Airlie 
---
 src/amd/vulkan/radv_wsi.c   | 3 ++-
 src/intel/vulkan/anv_wsi.c  | 3 ++-
 src/vulkan/wsi/wsi_common.h | 6 +++---
 src/vulkan/wsi/wsi_common_wayland.c | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 64f5b0d..98346ca 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -29,8 +29,9 @@
 #include "vk_util.h"
 #include "util/macros.h"
 
+#define WSI_CB(x) .x = radv_##x
 MAYBE_UNUSED static const struct wsi_callbacks wsi_cbs = {
-   .get_phys_device_format_properties = radv_GetPhysicalDeviceFormatProperties,
+   WSI_CB(GetPhysicalDeviceFormatProperties),

The indentation is wrong here.
--Michael

 };
 
 VkResult
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 08d83cd..945b011 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -27,8 +27,9 @@
 #include "vk_util.h"
 
 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
+#define WSI_CB(x) .x = anv_##x
 static const struct wsi_callbacks wsi_cbs = {
-   .get_phys_device_format_properties = anv_GetPhysicalDeviceFormatProperties,
+   WSI_CB(GetPhysicalDeviceFormatProperties),
 };
 #endif
 
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index 8166b7d..7be0182 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -118,11 +118,11 @@ struct wsi_device {
 struct wsi_interface *  wsi[VK_ICD_WSI_PLATFORM_MAX];
 };
 
+#define WSI_CB(cb) PFN_vk##cb cb
 struct wsi_callbacks {
-   void (*get_phys_device_format_properties)(VkPhysicalDevice physicalDevice,
- VkFormat format,
- VkFormatProperties 
*pFormatProperties);
+   WSI_CB(GetPhysicalDeviceFormatProperties);
 };
+#undef WSI_CB
 
 #define WSI_DEFINE_NONDISP_HANDLE_CASTS(__wsi_type, __VkType)  \
\
diff --git a/src/vulkan/wsi/wsi_common_wayland.c 
b/src/vulkan/wsi/wsi_common_wayland.c
index 4c94cd6..b93c3d7 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -84,7 +84,7 @@ wsi_wl_display_add_vk_format(struct wsi_wl_display *display, 
VkFormat format)
/* Don't add formats that aren't renderable. */
VkFormatProperties props;
 
-   
display->wsi_wl->cbs->get_phys_device_format_properties(display->wsi_wl->physical_device,
+   
display->wsi_wl->cbs->GetPhysicalDeviceFormatProperties(display->wsi_wl->physical_device,
format, );
if (!(props.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT))
   return;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] intel/reg: Add helpers for 64-bit integer immediates

2017-11-03 Thread Michael Schellenberger Costa
Hi Jason,

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Jason Ekstrand
Gesendet: Freitag, 3. November 2017 05:53
An: mesa-dev@lists.freedesktop.org
Cc: Jason Ekstrand 
Betreff: [Mesa-dev] [PATCH 1/4] intel/reg: Add helpers for 64-bit integer 
immediates

---
 src/intel/compiler/brw_reg.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h
index d68d64f..a641869 100644
--- a/src/intel/compiler/brw_reg.h
+++ b/src/intel/compiler/brw_reg.h
@@ -597,6 +597,24 @@ brw_imm_f(float f)
return imm;
 }
 
+/** Construct int64_t immediate register */
+static inline struct brw_reg
+brw_imm_q(int64_t q)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_Q);
+   imm.d64 = q;
+   return imm;
+}
+
+/** Construct int64_t immediate register */
This should be uint64_t?

--Michael

+static inline struct brw_reg
+brw_imm_uq(uint64_t uq)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
+   imm.u64 = uq;
+   return imm;
+}
+
 /** Construct integer immediate register */
 static inline struct brw_reg
 brw_imm_d(int d)
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] glsl: check if induction var incremented before use in terminator

2017-09-21 Thread Michael Schellenberger Costa

Out of curriosity what about decrement and other shenanigans?

--Michael


Am 21.09.2017 um 12:55 schrieb Timothy Arceri:

do-while loops can increment the starting value before the
condition is checked. e.g.

   do {
 ndx++;
   } while (ndx < 3);

This commit changes the code to detect this and reduces the
iteration count by 1 if found.

V2: fix terminator spelling

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Elie Tournier 
---
  src/compiler/glsl/loop_analysis.cpp | 38 +
  1 file changed, 38 insertions(+)

diff --git a/src/compiler/glsl/loop_analysis.cpp 
b/src/compiler/glsl/loop_analysis.cpp
index 81a07f78f8..78279844dc 100644
--- a/src/compiler/glsl/loop_analysis.cpp
+++ b/src/compiler/glsl/loop_analysis.cpp
@@ -164,20 +164,54 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, 
ir_rvalue *increment,
   iter_value += bias[i];
   valid_loop = true;
   break;
}
 }
  
 ralloc_free(mem_ctx);

 return (valid_loop) ? iter_value : -1;
  }
  
+static bool

+incremented_before_terminator(ir_loop *loop, ir_variable *var,
+  ir_if *terminator)
+{
+   for (exec_node *node = loop->body_instructions.get_head();
+!node->is_tail_sentinel();
+node = node->get_next()) {
+  ir_instruction *ir = (ir_instruction *) node;
+
+  switch (ir->ir_type) {
+  case ir_type_if:
+ if (ir->as_if() == terminator)
+return false;
+ break;
+
+  case ir_type_assignment: {
+ ir_assignment *assign = ir->as_assignment();
+ ir_variable *assignee = assign->lhs->whole_variable_referenced();
+
+ if (assignee == var) {
+assert(assign->condition == NULL);
+return true;
+ }
+
+ break;
+  }
+
+  default:
+ break;
+  }
+   }
+
+   unreachable("Unable to find induction variable");
+}
  
  /**

   * Record the fact that the given loop variable was referenced inside the 
loop.
   *
   * \arg in_assignee is true if the reference was on the LHS of an assignment.
   *
   * \arg in_conditional_code_or_nested_loop is true if the reference occurred
   * inside an if statement or a nested loop.
   *
   * \arg current_assignment is the ir_assignment node that the loop variable is
@@ -575,20 +609,24 @@ loop_analysis::visit_leave(ir_loop *ir)
  
  	 ir_variable *var = counter->variable_referenced();
  
  	 ir_rvalue *init = find_initial_value(ir, var);
  
   loop_variable *lv = ls->get(var);

   if (lv != NULL && lv->is_induction_var()) {
  t->iterations = calculate_iterations(init, limit, lv->increment,
   cmp);
  
+if (incremented_before_terminator(ir, var, t->ir)) {

+   t->iterations--;
+}
+
  if (t->iterations >= 0 &&
  (ls->limiting_terminator == NULL ||
   t->iterations < ls->limiting_terminator->iterations)) {
 ls->limiting_terminator = t;
  }
   }
   break;
}
  
default:


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] i965/fs/nir: Don't stomp 64-bit values to D in get_nir_src

2017-08-28 Thread Michael Schellenberger Costa

Hi Jason,


given that 16bit types are already on the horizon a

switch(nir_src_bit_size(src))


seems more future-proof

--Michael


Am 28.08.2017 um 16:51 schrieb Jason Ekstrand:

---
  src/intel/compiler/brw_fs_nir.cpp | 38 ++
  1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index c46998a..a882979 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1438,11 +1438,24 @@ fs_visitor::get_nir_src(const nir_src )
 src.reg.base_offset * src.reg.reg->num_components);
 }
  
-   /* to avoid floating-point denorm flushing problems, set the type by

-* default to D - instructions that need floating point semantics will set
-* this to F if they need to
-*/
-   return retype(reg, BRW_REGISTER_TYPE_D);
+   if (nir_src_bit_size(src) == 32) {
+  /* to avoid floating-point denorm flushing problems, set the type by
+   * default to D - instructions that need floating point semantics will
+   * set this to F if they need to
+   */
+  reg.type = BRW_REGISTER_TYPE_D;
+   } else {
+  /* We try to avoid dnorm issues for 64-bit too but we only have Q types
+   * on gen8+ so gen7 gets DF.
+   */
+  assert(nir_src_bit_size(src) == 64);
+  if (devinfo->gen >= 8)
+ reg.type = BRW_REGISTER_TYPE_Q;
+  else
+ reg.type = BRW_REGISTER_TYPE_DF;
+   }
+
+   return reg;
  }
  
  /**

@@ -1452,6 +1465,10 @@ fs_reg
  fs_visitor::get_nir_src_imm(const nir_src )
  {
 nir_const_value *val = nir_src_as_const_value(src);
+   /* This function shouldn't be called on anything which can even
+* possibly be 64 bits as it can't do what it claims.
+*/
+   assert(nir_src_bit_size(src) == 32);
 return val ? fs_reg(brw_imm_d(val->i32[0])) : get_nir_src(src);
  }
  
@@ -2643,8 +2660,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder ,

  */
 unsigned channel = iter * 2 + i;
 fs_reg dest = shuffle_64bit_data_for_32bit_write(bld,
-  retype(offset(value, bld, 2 * channel), 
BRW_REGISTER_TYPE_DF),
-  1);
+  offset(value, bld, channel), 1);
  
 srcs[header_regs + (i + first_component) * 2] = dest;

 srcs[header_regs + (i + first_component) * 2 + 1] =
@@ -3495,8 +3511,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder ,
if (nir_src_bit_size(instr->src[0]) == 64) {
   type_size = 8;
   val_reg = shuffle_64bit_data_for_32bit_write(bld,
-retype(val_reg, BRW_REGISTER_TYPE_DF),
-instr->num_components);
+val_reg, instr->num_components);
}
  
unsigned type_slots = type_size / 4;

@@ -3995,8 +4010,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
if (nir_src_bit_size(instr->src[0]) == 64) {
   type_size = 8;
   val_reg = shuffle_64bit_data_for_32bit_write(bld,
-retype(val_reg, BRW_REGISTER_TYPE_DF),
-instr->num_components);
+val_reg, instr->num_components);
}
  
unsigned type_slots = type_size / 4;

@@ -4053,7 +4067,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
unsigned first_component = nir_intrinsic_component(instr);
if (nir_src_bit_size(instr->src[0]) == 64) {
   fs_reg tmp = shuffle_64bit_data_for_32bit_write(bld,
-retype(src, BRW_REGISTER_TYPE_DF), num_components);
+src, num_components);
   src = tmp;
   num_components *= 2;
}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving bindless slot 0

2017-08-23 Thread Michael Schellenberger Costa
Hi Samuel,

do you want to fully remove the assert or should this be something the kind of

MAYBE_UNUSED unsigned res = util_idalloc_alloc(>bindless_used_slots);
assert(res != 0);

--Michael

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Samuel Pitoiset
Gesendet: Mittwoch, 23. August 2017 09:43
An: mesa-dev@lists.freedesktop.org
Betreff: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving bindless 
slot 0

When assertions were disabled, the compiler removed
the call to util_idalloc_alloc() and the first allocated
bindless slot was 0 which is invalid per the spec.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index f66ecc3e68..c53253ac8d 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2192,7 +2192,7 @@ static void si_init_bindless_descriptors(struct 
si_context *sctx,
util_idalloc_resize(>bindless_used_slots, num_elements);
 
/* Reserve slot 0 because it's an invalid handle for bindless. */
-   assert(!util_idalloc_alloc(>bindless_used_slots));
+   util_idalloc_alloc(>bindless_used_slots);
 }
 
 static void si_release_bindless_descriptors(struct si_context *sctx)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] intel/isl: Replace switch statements of doom with a macro

2017-08-21 Thread Michael Schellenberger Costa
Hi Jason,

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Jason Ekstrand
Gesendet: Donnerstag, 17. August 2017 23:57
An: mesa-dev@lists.freedesktop.org
Cc: Jason Ekstrand 
Betreff: [Mesa-dev] [PATCH 2/2] intel/isl: Replace switch statements of doom 
with a macro

---
 src/intel/isl/isl.c | 135 +++-
 1 file changed, 39 insertions(+), 96 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 1339867..3788f9c 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -1742,6 +1742,42 @@ isl_surf_get_ccs_surf(const struct isl_device *dev,
 .tiling_flags = ISL_TILING_CCS_BIT);
 }
 
+#define isl_genX_call(dev, func, ...)  \
+   switch (ISL_DEV_GEN(dev)) { \
+   case 4: \
+  /* G45 surface state is the same as gen5 */  \
+  if (ISL_DEV_IS_G4X(dev)) {   \
+ isl_gen5_##func(__VA_ARGS__); \
+  } else { \
+ isl_gen4_##func(__VA_ARGS__); \
+  }\
+  break;   \
+   case 5: \
+  isl_gen5_##func(__VA_ARGS__);\
+  break;   \
+   case 6: \
+  isl_gen6_##func(__VA_ARGS__);\
+  break;   \
+   case 7: \
+  if (ISL_DEV_IS_HASWELL(dev)) {   \
+ isl_gen75_##func(__VA_ARGS__);\
+  } else { \
+ isl_gen7_##func(__VA_ARGS__); \
+  }\
+  break;   \
+   case 8: \
+  isl_gen8_##func(__VA_ARGS__);\
+  break;   \
+   case 9: \
+  isl_gen9_##func(__VA_ARGS__);\
+  break;   \
+   case 10:\
+  isl_gen10_##func(__VA_ARGS__);   \
+  break;   \
+   default:\
+  assert(!"Unknown hardware generation");  \

In all the cases you replaced, the assert read

assert(!"Cannot fill surface state for this gen")

Maybe use that here? Also would unreachable be more suitable?
--Michael

+   }
+
 void
 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
   const struct isl_surf_fill_state_info *restrict info)
@@ -1765,74 +1801,14 @@ isl_surf_fill_state_s(const struct isl_device *dev, 
void *state,
  info->surf->logical_level0_px.array_len);
}
 
-   switch (ISL_DEV_GEN(dev)) {
-   case 4:
-  if (ISL_DEV_IS_G4X(dev)) {
- /* G45 surface state is the same as gen5 */
- isl_gen5_surf_fill_state_s(dev, state, info);
-  } else {
- isl_gen4_surf_fill_state_s(dev, state, info);
-  }
-  break;
-   case 5:
-  isl_gen5_surf_fill_state_s(dev, state, info);
-  break;
-   case 6:
-  isl_gen6_surf_fill_state_s(dev, state, info);
-  break;
-   case 7:
-  if (ISL_DEV_IS_HASWELL(dev)) {
- isl_gen75_surf_fill_state_s(dev, state, info);
-  } else {
- isl_gen7_surf_fill_state_s(dev, state, info);
-  }
-  break;
-   case 8:
-  isl_gen8_surf_fill_state_s(dev, state, info);
-  break;
-   case 9:
-  isl_gen9_surf_fill_state_s(dev, state, info);
-  break;
-   case 10:
-  isl_gen10_surf_fill_state_s(dev, state, info);
-  break;
-   default:
-  assert(!"Cannot fill surface state for this gen");
-   }
+   isl_genX_call(dev, surf_fill_state_s, dev, state, info);
 }
 
 void
 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
 const struct isl_buffer_fill_state_info *restrict info)
 {
-   switch (ISL_DEV_GEN(dev)) {
-   case 4:
-   case 5:
-  /* Gen 4-5 are all the same when it comes to buffer surfaces */
-  isl_gen5_buffer_fill_state_s(state, info);
-  break;
-   case 6:
-  isl_gen6_buffer_fill_state_s(state, info);
-  break;
-   case 7:
-  if (ISL_DEV_IS_HASWELL(dev)) {
- isl_gen75_buffer_fill_state_s(state, info);
-  } else {
- isl_gen7_buffer_fill_state_s(state, info);
-  }
-  break;
-   case 8:
-  isl_gen8_buffer_fill_state_s(state, info);
-  break;
-   case 9:
-  isl_gen9_buffer_fill_state_s(state, info);
-  break;
-   case 10:
-  isl_gen10_buffer_fill_state_s(state, info);
-  break;
-   default:
-  

Re: [Mesa-dev] [PATCH 1/5] i965: Enable non-CCS_E fast-clears on gen9+

2017-06-15 Thread Michael Schellenberger Costa
Hi Jason


On 15.06.2017 03:54, Jason Ekstrand wrote:
> Sky Lake and above can fast-clear exactly the same set of formats as
> older hardware.  The only restriction is that you can't *texture* from
> it unless the format supports CCS_E but you can fast-clear and render to
> it just fine.  All of the code exists and now that we have sane resolves,
> we can trivially turn it on.
>
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 --
>  1 file changed, 4 insertions(+), 14 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 02e74ca..c19d2d5 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -207,13 +207,7 @@ intel_miptree_supports_non_msrt_fast_clear(struct 
> brw_context *brw,
> if (!brw->format_supported_as_render_target[mt->format])
>return false;
Can you just do

return brw->format_supported_as_render_target[mt->format];

now the second clause is gone.
Michael

>  
> -   if (brw->gen >= 9) {
> -  mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
> -  const enum isl_format isl_format =
> - brw_isl_format_for_mesa_format(linear_format);
> -  return isl_format_supports_ccs_e(>screen->devinfo, isl_format);
> -   } else
> -  return true;
> +   return true;
>  }
>  
>  /* On Gen9 support for color buffer compression was extended to single
> @@ -257,16 +251,12 @@ intel_miptree_supports_lossless_compressed(struct 
> brw_context *brw,
> if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
>return false;
>  
> -   /* Fast clear mechanism and lossless compression go hand in hand. */
> +   /* Fast clear support is a pre-requisite for lossless compression */
> if (!intel_miptree_supports_non_msrt_fast_clear(brw, mt))
>return false;
>  
> -   /* Fast clear can be also used to clear srgb surfaces by using equivalent
> -* linear format. This trick, however, can't be extended to be used with
> -* lossless compression and therefore a check is needed to see if the 
> format
> -* really is linear.
> -*/
> -   return _mesa_get_srgb_format_linear(mt->format) == mt->format;
> +   enum isl_format isl_format = brw_isl_format_for_mesa_format(mt->format);
> +   return isl_format_supports_ccs_e(>screen->devinfo, isl_format);
>  }
>  
>  /**

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: create locked version of HashWalk

2017-04-24 Thread Michael Schellenberger Costa
Him Tim,

I hadnt had my morning coffee butdidnt you do it the other way around?

It looks like, that "_mesa_HashWalk" ist he locked and "_mesa_HashWalkLocked" 
the unlocked version.

Michael

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Timothy Arceri
Gesendet: Montag, 24. April 2017 07:59
An: mesa-dev@lists.freedesktop.org
Cc: Timothy Arceri 
Betreff: [Mesa-dev] [PATCH 1/2] mesa: create locked version of HashWalk

---
 src/mesa/main/hash.c | 34 ++
 src/mesa/main/hash.h |  5 +
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c index b7a7bd9..a3772bd 
100644
--- a/src/mesa/main/hash.c
+++ b/src/mesa/main/hash.c
@@ -404,40 +404,58 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table,  }
 
 
 /**
  * Walk over all entries in a hash table, calling callback function for each.
  * \param table  the hash table to walk
  * \param callback  the callback function
  * \param userData  arbitrary pointer to pass along to the callback
  *  (this is typically a struct gl_context pointer)
  */
+static void
+hash_walk_unlocked(const struct _mesa_HashTable *table,
+   void (*callback)(GLuint key, void *data, void *userData),
+   void *userData)
+{
+   assert(table);
+   assert(callback);
+
+   struct hash_entry *entry;
+   hash_table_foreach(table->ht, entry) {
+  callback((uintptr_t)entry->key, entry->data, userData);
+   }
+   if (table->deleted_key_data)
+  callback(DELETED_KEY_VALUE, table->deleted_key_data, userData); }
+
+
 void
 _mesa_HashWalk(const struct _mesa_HashTable *table,
void (*callback)(GLuint key, void *data, void *userData),
void *userData)
 {
/* cast-away const */
struct _mesa_HashTable *table2 = (struct _mesa_HashTable *) table;
-   struct hash_entry *entry;
 
-   assert(table);
-   assert(callback);
mtx_lock(>Mutex);
-   hash_table_foreach(table->ht, entry) {
-  callback((uintptr_t)entry->key, entry->data, userData);
-   }
-   if (table->deleted_key_data)
-  callback(DELETED_KEY_VALUE, table->deleted_key_data, userData);
+   hash_walk_unlocked(table, callback, userData);
mtx_unlock(>Mutex);
 }
 
+void
+_mesa_HashWalkLocked(const struct _mesa_HashTable *table,
+   void (*callback)(GLuint key, void *data, void *userData),
+   void *userData)
+{
+   hash_walk_unlocked(table, callback, userData); }
+
 static void
 debug_print_entry(GLuint key, void *data, void *userData)  {
_mesa_debug(NULL, "%u %p\n", key, data);  }
 
 /**
  * Dump contents of hash table for debugging.
  *
  * \param table the hash table.
diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h index 52a6c5d..9eb0f0e 
100644
--- a/src/mesa/main/hash.h
+++ b/src/mesa/main/hash.h
@@ -59,20 +59,25 @@ extern void _mesa_HashRemoveLocked(struct _mesa_HashTable 
*table, GLuint key);  extern void  _mesa_HashDeleteAll(struct _mesa_HashTable 
*table,
 void (*callback)(GLuint key, void *data, void *userData),
 void *userData);
 
 extern void
 _mesa_HashWalk(const struct _mesa_HashTable *table,
void (*callback)(GLuint key, void *data, void *userData),
void *userData);
 
+extern void
+_mesa_HashWalkLocked(const struct _mesa_HashTable *table,
+ void (*callback)(GLuint key, void *data, void *userData),
+ void *userData);
+
 extern void _mesa_HashPrint(const struct _mesa_HashTable *table);
 
 extern GLuint _mesa_HashFindFreeKeyBlock(struct _mesa_HashTable *table, GLuint 
numKeys);
 
 extern GLuint
 _mesa_HashNumEntries(const struct _mesa_HashTable *table);
 
 extern void _mesa_test_hash_functions(void);
 
 
--
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] radv: Implement pipeline statistics queries.

2017-04-11 Thread Michael Schellenberger Costa

Hi Bas,

it seems like this junk

+   nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_store_ssbo);
+   store->src[0] = nir_src_for_ssa(available);
+   store->src[1] = nir_src_for_ssa(_buf->dest.ssa);
+   store->src[2] = nir_src_for_ssa(nir_iadd(, output_base, nir_imul(, 
elem_count, elem_size)));
+   nir_intrinsic_set_write_mask(store, 0x1);
+   store->num_components = 1;
+   nir_builder_instr_insert(, >instr);


would make a great helper function, as it is repeated 5 times in the 
code and only the input for src[0] and src[2] changes.


Similarly you could simplify those longer sequences

+   /* Store the availability bit if requested. */
+   nir_if *availability_if = nir_if_create(b.shader);
+   availability_if->condition = nir_src_for_ssa(nir_iand(, flags, 
nir_imm_int(, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+   nir_cf_node_insert(b.cursor, _if->cf_node);
+
+   b.cursor = nir_after_cf_list(_if->then_list);
+
+   nir_store_for_ssbo(store, available, )nir_intrinsic_instr *store = 
nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+   store->src[0] = nir_src_for_ssa(available);
+   store->src[1] = nir_src_for_ssa(_buf->dest.ssa);
+   store->src[2] = nir_src_for_ssa(nir_iadd(, output_base, nir_imul(, 
elem_count, elem_size)));
+   nir_intrinsic_set_write_mask(store, 0x1);
+   store->num_components = 1;
+   nir_builder_instr_insert(, >instr);
+
+   b.cursor = nir_after_cf_node(_if->cf_node);

which appear twice

All the best
Michael

Am 11.04.2017 um 02:04 schrieb Bas Nieuwenhuizen:

The devil is in the shader again, otherwise this is
fairly straightforward.

The CTS contains no pipeline statistics copy to buffer
testcases, so I did a basic smoketest.

Signed-off-by: Bas Nieuwenhuizen 
---
  src/amd/vulkan/radv_device.c  |   2 +-
  src/amd/vulkan/radv_private.h |   2 +
  src/amd/vulkan/radv_query.c   | 414 +++---
  3 files changed, 392 insertions(+), 26 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 9e8faa3da9a..5f14394196a 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -483,7 +483,7 @@ void radv_GetPhysicalDeviceFeatures(
.textureCompressionASTC_LDR   = false,
.textureCompressionBC = true,
.occlusionQueryPrecise= true,
-   .pipelineStatisticsQuery  = false,
+   .pipelineStatisticsQuery  = true,
.vertexPipelineStoresAndAtomics   = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize   = true,
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b54a2537c8a..2cb8cdd8d84 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -443,6 +443,7 @@ struct radv_meta_state {
VkDescriptorSetLayout ds_layout;
VkPipelineLayout p_layout;
VkPipeline occlusion_query_pipeline;
+   VkPipeline pipeline_statistics_query_pipeline;
} query;
  };
  
@@ -1379,6 +1380,7 @@ struct radv_query_pool {

uint32_t availability_offset;
char *ptr;
VkQueryType type;
+   uint32_t pipeline_stats_mask;
  };
  
  VkResult

diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index dc1844adb51..2de484224bc 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -35,6 +35,9 @@
  #include "radv_cs.h"
  #include "sid.h"
  
+

+static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 
8, 9, 10};
+
  static unsigned get_max_db(struct radv_device *device)
  {
unsigned num_db = device->physical_device->rad_info.num_render_backends;
@@ -269,14 +272,259 @@ build_occlusion_query_shader(struct radv_device *device) 
{
return b.shader;
  }
  
+static nir_shader *

+build_pipeline_statistics_query_shader(struct radv_device *device) {
+   /* the shader this builds is roughly
+*
+* push constants {
+*  uint32_t flags;
+*  uint32_t dst_stride;
+*  uint32_t stats_mask;
+*  uint32_t avail_offset;
+* };
+*
+* uint32_t src_stride = 11 * 16;
+*
+* location(binding = 0) buffer dst_buf;
+* location(binding = 1) buffer src_buf;
+*
+* void main() {
+*  uint64_t src_offset = src_stride * global_id.x;
+*  uint64_t dst_base = dst_stride * global_id.x;
+*  uint64_t dst_offset = dst_base;
+*  uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+*  uint32_t elem_count = stats_mask >> 16;
+*  uint32_t available = src_buf[avail_offset + 

Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling. (v2)

2017-02-26 Thread Michael Schellenberger Costa
Hi Dave,

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Dave Airlie
Gesendet: Freitag, 24. Februar 2017 04:59
An: mesa-dev@lists.freedesktop.org
Betreff: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling. (v2)

From: Dave Airlie 

This enables LLVM loop unrolling.

v2: limit unroll count to 32, don't fully unroll. (arsenm)

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_helper.cpp | 22 ++
 src/amd/common/ac_llvm_util.h |  1 +
 src/amd/common/ac_nir_to_llvm.c   | 26 --
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index 594339e..85b0cbf 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -36,7 +36,9 @@
 #include 
 #include 
 #include 
+#include 
 
+using namespace llvm;

If you have to use the namespace you should  adopt the other lines below too

 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)  {
llvm::Argument *A = llvm::unwrap(val);
@@ -53,3 +55,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
   AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);  }
   
--Michael

+
+// MetadataAsValue uses a canonical format which strips the actual 
+MDNode for // MDNode with just a single constant value, storing just a 
+ConstantAsMetadata // This undoes this canonicalization, reconstructing the 
MDNode.
+static MDNode *extractMDNode(MetadataAsValue *MAV) {
+   Metadata *MD = MAV->getMetadata();
+   assert((isa(MD) || isa(MD)) &&
+  "Expected a metadata node or a canonicalized constant");
+
+   if (MDNode *N = dyn_cast(MD))
+   return N;
+   assert(0);
+   return MDNode::get(MAV->getContext(), MD); }
+
+void ac_metadata_point_op0_to_itself(LLVMValueRef v) {
+   MDNode *node = extractMDNode(unwrap(v));
+   node->replaceOperandWith(0, node);
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h 
index 1f37a12..0d6c53c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -48,6 +48,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
radeon_family family, bool su
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);  bool 
ac_is_sgpr_param(LLVMValueRef param);
+void ac_metadata_point_op0_to_itself(LLVMValueRef v);
 
 void
 ac_add_function_attr(LLVMValueRef function, diff --git 
a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 
9778581..d7a9a7b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3950,6 +3950,23 @@ static void visit_if(struct nir_to_llvm_context *ctx, 
nir_if *if_stmt)
LLVMPositionBuilderAtEnd(ctx->builder, merge_block);  }
 
+static void set_unroll_metadata(struct nir_to_llvm_context *ctx,
+  LLVMValueRef br)
+{
+   unsigned kind = LLVMGetMDKindIDInContext(ctx->context, "llvm.loop", 9);
+   LLVMValueRef md_unroll;
+   LLVMValueRef part_arg = LLVMMDStringInContext(ctx->context, 
"llvm.loop.unroll.count", 22);
+   LLVMValueRef count_arg = LLVMConstInt(ctx->i32, 32, false);
+   LLVMValueRef args[2] = {part_arg, count_arg};
+   LLVMValueRef count = LLVMMDNodeInContext(ctx->context, args, 2);
+
+   LLVMValueRef md_args[] = {NULL, count};
+   md_unroll = LLVMMDNodeInContext(ctx->context, md_args, 2);
+   ac_metadata_point_op0_to_itself(md_unroll);
+
+   LLVMSetMetadata(br, kind, md_unroll);
+}
+
 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)  {
LLVMBasicBlockRef continue_parent = ctx->continue_block; @@ -3964,8 
+3981,10 @@ static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop 
*loop)
LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
visit_cf_list(ctx, >body);
 
-   if (LLVMGetInsertBlock(ctx->builder))
-   LLVMBuildBr(ctx->builder, ctx->continue_block);
+   if (LLVMGetInsertBlock(ctx->builder)) {
+   LLVMValueRef loop = LLVMBuildBr(ctx->builder, 
ctx->continue_block);
+   set_unroll_metadata(ctx, loop);
+   }
LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
 
ctx->continue_block = continue_parent; @@ -4827,10 +4846,13 @@ static 
void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
 
/* Add some optimization passes */
LLVMAddScalarReplAggregatesPass(passmgr);
+   LLVMAddLoopRotatePass(passmgr);
LLVMAddLICMPass(passmgr);
LLVMAddAggressiveDCEPass(passmgr);
LLVMAddCFGSimplificationPass(passmgr);
LLVMAddInstructionCombiningPass(passmgr);
+   LLVMAddIndVarSimplifyPass(passmgr);
+   

Re: [Mesa-dev] [PATCH] intel/aubinator: Use the correct length for MEDIA commands

2016-11-22 Thread Michael Schellenberger Costa
Hi Jason,

this would be the same as case 3. Is that intentional and if so would you 
combine the cases?
--Michael

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Jason Ekstrand
Gesendet: Dienstag, 22. November 2016 03:59
An: mesa-dev@lists.freedesktop.org
Cc: Jason Ekstrand 
Betreff: [Mesa-dev] [PATCH] intel/aubinator: Use the correct length for MEDIA 
commands

---
 src/intel/tools/decoder.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/tools/decoder.c b/src/intel/tools/decoder.c index 
6bd02bf..55488eb 100644
--- a/src/intel/tools/decoder.c
+++ b/src/intel/tools/decoder.c
@@ -612,8 +612,8 @@ gen_group_get_length(struct gen_group *group, const 
uint32_t *p)
  return field(h, 0, 7) + 2;
   case 1:
  return 1;
-  case 2:
- return 2;
+  case 2: /* MEDIA */
+ return field(h, 0, 7) + 2;
   case 3:
  return field(h, 0, 7) + 2;
   }
--
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/18] anv/image: Memset all aux surfaces (not just HiZ) to 0

2016-10-30 Thread Michael Schellenberger Costa
Hi Jason,

the first word of the comment should be auxiliary?

--Michael

Am 28.10.2016 um 11:17 schrieb Jason Ekstrand:
> Signed-off-by: Jason Ekstrand 
> ---
>  src/intel/vulkan/anv_image.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 2104901..bdf8bca 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -293,7 +293,7 @@ VkResult anv_BindImageMemory(
>image->offset = 0;
> }
>  
> -   if (anv_image_has_hiz(image)) {
> +   if (image->aux_surface.isl.size > 0) {
>  
>/* The offset and size must be a multiple of 4K or else the
> * anv_gem_mmap call below will return NULL.
> @@ -301,9 +301,11 @@ VkResult anv_BindImageMemory(
>assert((image->offset + image->aux_surface.offset) % 4096 == 0);
>assert(image->aux_surface.isl.size % 4096 == 0);
>  
> -  /* HiZ surfaces need to have their memory cleared to 0 before they
> -   * can be used.  If we let it have garbage data, it can cause GPU
> -   * hangs on some hardware.
> +  /* Ausilizry surfaces need to have their memory cleared to 0 before 
> they
> +   * can be used.  For CCS surfaces, this puts them in the "resolved"
> +   * state so they can be used with CCS enabled before we ever touch it
> +   * from the GPU.  For HiZ, we need something valid or else we may get
> +   * GPU hangs on some hardware and 0 works fine.
> */
>void *map = anv_gem_mmap(device, image->bo->gem_handle,
> image->offset + image->aux_surface.offset,
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: optimize list handling in opt_dead_code

2016-10-18 Thread Michael Schellenberger Costa

Hi Jan,


On 18.10.2016 00:07, Jan Ziak wrote:

This patch replaces the ir_variable_refcount_entry's linked-list
with an array-list.

The array-list has local storage which does not require ANY additional
allocations if the list has small number of elements. The size of this
storage is configurable for each variable.

Benchmark results for "./run -1 shaders" from shader-db[1]:

- The total number of executed instructions goes down from 64.184 to 63.797
   giga-instructions when Mesa is compiled with "gcc -O0 ..."
- In the call tree starting at function do_dead_code():
   - the number of calls to malloc() is reduced by about 10%
   - the number of calls to free() is reduced by about 30%

[1] git://anongit.freedesktop.org/mesa/shader-db

Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0...@gmail.com>
---
  src/compiler/glsl/ir_variable_refcount.cpp |  14 +--
  src/compiler/glsl/ir_variable_refcount.h   |   8 +-
  src/compiler/glsl/opt_dead_code.cpp|  19 ++--
  src/util/fast_list.h   | 167 +
  4 files changed, 176 insertions(+), 32 deletions(-)

diff --git a/src/compiler/glsl/ir_variable_refcount.cpp 
b/src/compiler/glsl/ir_variable_refcount.cpp
index 8306be1..94d6edc 100644
--- a/src/compiler/glsl/ir_variable_refcount.cpp
+++ b/src/compiler/glsl/ir_variable_refcount.cpp
@@ -46,15 +46,6 @@ static void
  free_entry(struct hash_entry *entry)
  {
 ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) 
entry->data;
-
-   /* Free assignment list */
-   exec_node *n;
-   while ((n = ivre->assign_list.pop_head()) != NULL) {
-  struct assignment_entry *assignment_entry =
- exec_node_data(struct assignment_entry, n, link);
-  free(assignment_entry);
-   }
-
 delete ivre;
  }
  
@@ -142,10 +133,7 @@ ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)

 */
assert(entry->referenced_count >= entry->assigned_count);
if (entry->referenced_count == entry->assigned_count) {
- struct assignment_entry *assignment_entry =
-(struct assignment_entry *)calloc(1, sizeof(*assignment_entry));
- assignment_entry->assign = ir;
- entry->assign_list.push_head(_entry->link);
+ entry->assign_list.add(ir);
}
 }
  
diff --git a/src/compiler/glsl/ir_variable_refcount.h b/src/compiler/glsl/ir_variable_refcount.h

index 08a11c0..c3ec5fe 100644
--- a/src/compiler/glsl/ir_variable_refcount.h
+++ b/src/compiler/glsl/ir_variable_refcount.h
@@ -32,11 +32,7 @@
  #include "ir.h"
  #include "ir_visitor.h"
  #include "compiler/glsl_types.h"
-
-struct assignment_entry {
-   exec_node link;
-   ir_assignment *assign;
-};
+#include "util/fast_list.h"
  
  class ir_variable_refcount_entry

  {
@@ -50,7 +46,7 @@ public:
  * This is intended to be used for dead code optimisation and may
  * not be a complete list.
  */
-   exec_list assign_list;
+   arraylist assign_list;
  
 /** Number of times the variable is referenced, including assignments. */

 unsigned referenced_count;
diff --git a/src/compiler/glsl/opt_dead_code.cpp 
b/src/compiler/glsl/opt_dead_code.cpp
index 75e668a..06e8c3d 100644
--- a/src/compiler/glsl/opt_dead_code.cpp
+++ b/src/compiler/glsl/opt_dead_code.cpp
@@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
  
 struct hash_entry *e;

 hash_table_foreach(v.ht, e) {
-  ir_variable_refcount_entry *entry = (ir_variable_refcount_entry 
*)e->data;
+  ir_variable_refcount_entry *const entry = (ir_variable_refcount_entry 
*)e->data;
  
/* Since each assignment is a reference, the refereneced count must be

 * greater than or equal to the assignment count.  If they are equal,
@@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
if (entry->var->data.always_active_io)
   continue;
  
-  if (!entry->assign_list.is_empty()) {

+  if (!entry->assign_list.empty()) {
 /* Remove all the dead assignments to the variable we found.
  * Don't do so if it's a shader or function output, though.
  */
@@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
   entry->var->data.mode != ir_var_shader_out &&
   entry->var->data.mode != ir_var_shader_storage) {
  
-while (!entry->assign_list.is_empty()) {

-   struct assignment_entry *assignment_entry =
-  exec_node_data(struct assignment_entry,
- entry->assign_list.get_head_raw(), link);
-
-  assignment_entry->assign->remove();
-
+for(ir_assignment *assign : entry->assign_list) {
The original code separates control flow instructions as for or while 
with a space before the brace, aka "for (...". This applies for all the 
code.

+  assign->remove();
   if (debug) 

Re: [Mesa-dev] [PATCH 3/4] i965/blorp: Add a copy_miptrees helper

2016-09-12 Thread Michael Schellenberger Costa

Hi Jason,


On 09.09.2016 19:05, Jason Ekstrand wrote:

---
  src/mesa/drivers/dri/i965/brw_blorp.c | 71 +++
  src/mesa/drivers/dri/i965/brw_blorp.h | 10 +
  2 files changed, 81 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 193c7a4..626c33c 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -288,6 +288,23 @@ physical_to_logical_layer(struct intel_mipmap_tree *mt,
 }
  }
  
+static void

+miptree_check_level_logical_layer(struct intel_mipmap_tree *mt,
+  unsigned level,
+  unsigned logical_layer)
+{
+   if (mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
+   mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
+  const unsigned num_samples = MAX2(1, mt->num_samples);
+  for (unsigned s = 0; s < num_samples; s++) {
+ const unsigned physical_layer = (logical_layer * num_samples) + s;
+ intel_miptree_check_level_layer(mt, level, physical_layer);
+  }
+   } else {
+  intel_miptree_check_level_layer(mt, level, logical_layer);
+   }
+}
+
  /**
   * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
   * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
@@ -392,6 +409,60 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
dst_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED;
  }
  
+void

+brw_blorp_copy_miptrees(struct brw_context *brw,
+struct intel_mipmap_tree *src_mt,
+unsigned src_level, unsigned src_layer,
+struct intel_mipmap_tree *dst_mt,
+unsigned dst_level, unsigned dst_layer,
+unsigned src_x, unsigned src_y,
+unsigned dst_x, unsigned dst_y,
+unsigned src_width, unsigned src_height)
+{
+   /* Get ready to blit.  This includes depth resolving the src and dst
+* buffers if necessary.  Note: it's not necessary to do a color resolve on
+* the destination buffer because we use the standard render path to render
+* to destination color buffers, and the standard render path is
+* fast-color-aware.
+*/
+   intel_miptree_resolve_color(brw, src_mt, INTEL_MIPTREE_IGNORE_CCS_E);
+   intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_layer);
+   intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_layer);
+
+   intel_miptree_prepare_mcs(brw, dst_mt);
+
+   DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
+   "to %dx %s mt %p %d %d (%d,%d)\n",
+   __func__,
+   src_mt->num_samples, _mesa_get_format_name(src_mt->format), src_mt,
+   src_level, src_layer, src_x, src_y, src_width, src_height,
+   dst_mt->num_samples, _mesa_get_format_name(dst_mt->format), dst_mt,
+   dst_level, dst_layer, dst_x, dst_y);

That seems like some leftover?
--Michael

+
+   miptree_check_level_logical_layer(src_mt, src_level, src_layer);
+   miptree_check_level_logical_layer(dst_mt, dst_level, dst_layer);
+   intel_miptree_used_for_rendering(dst_mt);
+
+   struct isl_surf tmp_surfs[4];
+   struct blorp_surf src_surf, dst_surf;
+   blorp_surf_for_miptree(brw, _surf, src_mt, false,
+  _level, _surfs[0]);
+   blorp_surf_for_miptree(brw, _surf, dst_mt, true,
+  _level, _surfs[2]);
+
+   struct blorp_batch batch;
+   blorp_batch_init(>blorp, , brw);
+   blorp_copy(, _surf, src_level, src_layer,
+  _surf, dst_level, dst_layer,
+  src_x, src_y, dst_x, dst_y, src_width, src_height);
+   blorp_batch_finish();
+
+   intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
+
+   if (intel_miptree_is_lossless_compressed(brw, dst_mt))
+  dst_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED;
+}
+
  static struct intel_mipmap_tree *
  find_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb)
  {
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
b/src/mesa/drivers/dri/i965/brw_blorp.h
index afbf68f..abf3956 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -48,6 +48,16 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
  GLenum filter, bool mirror_x, bool mirror_y,
  bool decode_srgb, bool encode_srgb);
  
+void

+brw_blorp_copy_miptrees(struct brw_context *brw,
+struct intel_mipmap_tree *src_mt,
+unsigned src_level, unsigned src_logical_layer,
+struct intel_mipmap_tree *dst_mt,
+unsigned dst_level, unsigned dst_logical_layer,
+unsigned src_x, unsigned src_y,
+unsigned dst_x, unsigned dst_y,
+unsigned src_width, unsigned src_height);
+
  bool
  

Re: [Mesa-dev] [PATCH 3/3] nir/spirv: Use fill_common_atomic_sources for image atomics

2016-09-07 Thread Michael Schellenberger Costa
Hi Jason,

Am 07.09.2016 um 00:17 schrieb Jason Ekstrand:
> We had two almost identical copies of this code and they were both broken
> but in different ways.  The previous two commits fixed both of them.  This
> one just unifies them so that it's easier to handle in the future.
> 
> Signed-off-by: Jason Ekstrand 
> ---
>  src/compiler/spirv/spirv_to_nir.c | 99 
> +--
>  1 file changed, 43 insertions(+), 56 deletions(-)
> 
> diff --git a/src/compiler/spirv/spirv_to_nir.c 
> b/src/compiler/spirv/spirv_to_nir.c
> index 0d6a70e..e91a7b2 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -1589,6 +1589,47 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
> nir_builder_instr_insert(>nb, >instr);
>  }
>  
> +static void
> +fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
> +   const uint32_t *w, nir_src *src)
> +{
> +   switch (opcode) {
> +   case SpvOpAtomicIIncrement:
> +  src[0] = nir_src_for_ssa(nir_imm_int(>nb, 1));
> +  break;
> +
> +   case SpvOpAtomicIDecrement:
> +  src[0] = nir_src_for_ssa(nir_imm_int(>nb, -1));
> +  break;
> +
> +   case SpvOpAtomicISub:
> +  src[0] =
> + nir_src_for_ssa(nir_ineg(>nb, vtn_ssa_value(b, w[6])->def));
> +  break;
> +
> +   case SpvOpAtomicCompareExchange:
> +  src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
> +  src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
> +  break;
> +  /* Fall through */
That fall through seems wrong.
--Michael
> +
> +   case SpvOpAtomicExchange:
> +   case SpvOpAtomicIAdd:
> +   case SpvOpAtomicSMin:
> +   case SpvOpAtomicUMin:
> +   case SpvOpAtomicSMax:
> +   case SpvOpAtomicUMax:
> +   case SpvOpAtomicAnd:
> +   case SpvOpAtomicOr:
> +   case SpvOpAtomicXor:
> +  src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
> +  break;
> +
> +   default:
> +  unreachable("Invalid SPIR-V atomic");
> +   }
> +}
> +
>  static nir_ssa_def *
>  get_image_coord(struct vtn_builder *b, uint32_t value)
>  {
> @@ -1729,13 +1770,9 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
> case SpvOpImageWrite:
>intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
>break;
> +
> case SpvOpAtomicIIncrement:
> -  intrin->src[2] = nir_src_for_ssa(nir_imm_int(>nb, 1));
> -  break;
> case SpvOpAtomicIDecrement:
> -  intrin->src[2] = nir_src_for_ssa(nir_imm_int(>nb, -1));
> -  break;
> -
> case SpvOpAtomicExchange:
> case SpvOpAtomicIAdd:
> case SpvOpAtomicSMin:
> @@ -1745,16 +1782,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
> case SpvOpAtomicAnd:
> case SpvOpAtomicOr:
> case SpvOpAtomicXor:
> -  intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
> -  break;
> -
> -   case SpvOpAtomicCompareExchange:
> -  intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
> -  intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
> -  break;
> -
> -   case SpvOpAtomicISub:
> -  intrin->src[2] = nir_src_for_ssa(nir_ineg(>nb, vtn_ssa_value(b, 
> w[6])->def));
> +  fill_common_atomic_sources(b, opcode, w, >src[2]);
>break;
>  
> default:
> @@ -1829,47 +1857,6 @@ get_shared_nir_atomic_op(SpvOp opcode)
>  }
>  
>  static void
> -fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
> -   const uint32_t *w, nir_src *src)
> -{
> -   switch (opcode) {
> -   case SpvOpAtomicIIncrement:
> -  src[0] = nir_src_for_ssa(nir_imm_int(>nb, 1));
> -  break;
> -
> -   case SpvOpAtomicIDecrement:
> -  src[0] = nir_src_for_ssa(nir_imm_int(>nb, -1));
> -  break;
> -
> -   case SpvOpAtomicISub:
> -  src[0] =
> - nir_src_for_ssa(nir_ineg(>nb, vtn_ssa_value(b, w[6])->def));
> -  break;
> -
> -   case SpvOpAtomicCompareExchange:
> -  src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
> -  src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
> -  break;
> -  /* Fall through */
> -
> -   case SpvOpAtomicExchange:
> -   case SpvOpAtomicIAdd:
> -   case SpvOpAtomicSMin:
> -   case SpvOpAtomicUMin:
> -   case SpvOpAtomicSMax:
> -   case SpvOpAtomicUMax:
> -   case SpvOpAtomicAnd:
> -   case SpvOpAtomicOr:
> -   case SpvOpAtomicXor:
> -  src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
> -  break;
> -
> -   default:
> -  unreachable("Invalid SPIR-V atomic");
> -   }
> -}
> -
> -static void
>  vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
>   const uint32_t *w, unsigned count)
>  {
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] gallium: Fix incorrect unreachable call

2016-08-30 Thread Michael Schellenberger Costa
Signed-off-by: Michael Schellenberger Costa <mschellenbergerco...@gmail.com>
---
 src/gallium/state_trackers/nine/nine_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
b/src/gallium/state_trackers/nine/nine_shader.c
index bd373d7..4c58a6d 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -1898,7 +1898,7 @@ sm1_declusage_to_tgsi(struct tgsi_declaration_semantic 
*sem,
 sem->Index = 0;
 break;
 default:
-unreachable(!"Invalid DECLUSAGE.");
+unreachable("Invalid DECLUSAGE.");
 break;
 }
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa/st: Standardize some asserts to use !"foo"

2016-08-30 Thread Michael Schellenberger Costa
These are the final asserts in the tree that do not follow the pattern 
assert(!"foo"). To simplify spotting of (mostly) incorrect asserts via git grep 
"assert(\"" rewrite them into the standard form.

Signed-off-by: Michael Schellenberger Costa <mschellenbergerco...@gmail.com>
---
 src/mesa/state_tracker/st_atom_blend.c | 4 ++--
 src/mesa/state_tracker/st_atom_depth.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_blend.c 
b/src/mesa/state_tracker/st_atom_blend.c
index 65de67b..cdcba77 100644
--- a/src/mesa/state_tracker/st_atom_blend.c
+++ b/src/mesa/state_tracker/st_atom_blend.c
@@ -101,7 +101,7 @@ translate_blend(GLenum blend)
case GL_ONE_MINUS_SRC1_ALPHA:
   return PIPE_BLENDFACTOR_INV_SRC1_ALPHA;
default:
-  assert("invalid GL token in translate_blend()" == NULL);
+  assert(!"invalid GL token in translate_blend()");
   return 0;
}
 }
@@ -147,7 +147,7 @@ translate_logicop(GLenum logicop)
case GL_SET:
   return PIPE_LOGICOP_SET;
default:
-  assert("invalid GL token in translate_logicop()" == NULL);
+  assert(!"invalid GL token in translate_logicop()");
   return 0;
}
 }
diff --git a/src/mesa/state_tracker/st_atom_depth.c 
b/src/mesa/state_tracker/st_atom_depth.c
index 267b42c..e6f6f2f 100644
--- a/src/mesa/state_tracker/st_atom_depth.c
+++ b/src/mesa/state_tracker/st_atom_depth.c
@@ -90,7 +90,7 @@ gl_stencil_op_to_pipe(GLenum func)
case GL_INVERT:
   return PIPE_STENCIL_OP_INVERT;
default:
-  assert("invalid GL token in gl_stencil_op_to_pipe()" == NULL);
+  assert(!"invalid GL token in gl_stencil_op_to_pipe()");
   return 0;
}
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] glsl: Fix incorrect unreachable call

2016-08-30 Thread Michael Schellenberger Costa
Signed-off-by: Michael Schellenberger Costa <mschellenbergerco...@gmail.com>
---
 src/compiler/glsl/ir_print_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_print_visitor.cpp 
b/src/compiler/glsl/ir_print_visitor.cpp
index 0dd1c35..4cb8069 100644
--- a/src/compiler/glsl/ir_print_visitor.cpp
+++ b/src/compiler/glsl/ir_print_visitor.cpp
@@ -344,7 +344,7 @@ void ir_print_visitor::visit(ir_texture *ir)
   ir->lod_info.component->accept(this);
   break;
case ir_samples_identical:
-  unreachable(!"ir_samples_identical was already handled");
+  unreachable("ir_samples_identical was already handled");
};
fprintf(f, ")");
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] anv: Fix incorrect unreachable call

2016-08-30 Thread Michael Schellenberger Costa
Signed-off-by: Michael Schellenberger Costa <mschellenbergerco...@gmail.com>
---
 src/intel/vulkan/anv_meta_blit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c
index d2e375a..6f73102 100644
--- a/src/intel/vulkan/anv_meta_blit.c
+++ b/src/intel/vulkan/anv_meta_blit.c
@@ -300,7 +300,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
   pipeline = device->meta_state.blit.pipeline_3d_src;
   break;
default:
-  unreachable(!"bad VkImageType");
+  unreachable("bad VkImageType");
}
 
if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv: Fix incorrect assert in anv_wsi_wayland.c

2016-08-30 Thread Michael Schellenberger Costa
Signed-off-by: Michael Schellenberger Costa <mschellenbergerco...@gmail.com>
---
 src/intel/vulkan/anv_wsi_wayland.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_wsi_wayland.c 
b/src/intel/vulkan/anv_wsi_wayland.c
index a9e1617..d210e79 100644
--- a/src/intel/vulkan/anv_wsi_wayland.c
+++ b/src/intel/vulkan/anv_wsi_wayland.c
@@ -116,7 +116,7 @@ wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha)
 #endif
 
default:
-  assert("!Unsupported Vulkan format");
+  assert(!"Unsupported Vulkan format");
   return 0;
}
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965/vec4: make offset() operate in terms of channels instead of full registers

2016-08-23 Thread Michael Schellenberger Costa
Hi Iago,

given that the idea here was to unify vec4 and fs you might want to
adopt the names/function types accordingly.

In brw_ir_fs.h there is byte_offset that returns a fs_reg while you have
void add_byte_offset.

--Michael

Am 23.08.2016 um 10:24 schrieb Iago Toral Quiroga:
> This will make it more consistent with the FS implementation of the same
> helper and will provide more flexibility that will come in handy, for
> example, when we add a SIMD lowering pass in the vec4 backend.
> 
> v2:
>  - Move the switch statement to add_byte_offset (Iago)
>  - Remove the assert on the register file, it is redundant with the switch
>(Michael Schellenberger)
>  - Fix use of '=' instead of '==' (Michael Schellenberger,
>Francesco Ansanelli)
> ---
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h | 36 
> +
>  1 file changed, 28 insertions(+), 8 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index 81b6a13..d55b522 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -60,12 +60,33 @@ retype(src_reg reg, enum brw_reg_type type)
> return reg;
>  }
>  
> +static inline void
> +add_byte_offset(backend_reg *reg, unsigned delta)
> +{
> +   switch (reg->file) {
> +   case BAD_FILE:
> +  break;
> +   case MRF:
> +   case VGRF:
> +   case ATTR:
> +   case UNIFORM: {
> +  const unsigned suboffset = reg->subreg_offset + delta;
> +  reg->reg_offset += suboffset / REG_SIZE;
> +  reg->subreg_offset += suboffset % REG_SIZE;
> +  /* Align16 requires that register accesses are 16-byte aligned */
> +  assert(reg->subreg_offset % 16 == 0);
> +  break;
> +   }
> +   default:
> +  assert(delta == 0);
> +   }
> +}
> +
>  static inline src_reg
> -offset(src_reg reg, unsigned delta)
> +offset(src_reg reg, unsigned width, unsigned delta)
>  {
> -   assert(delta == 0 ||
> -  (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
> -   reg.reg_offset += delta;
> +   unsigned byte_offset = delta * width * type_sz(reg.type);
> +   add_byte_offset(, byte_offset);
> return reg;
>  }
>  
> @@ -130,11 +151,10 @@ retype(dst_reg reg, enum brw_reg_type type)
>  }
>  
>  static inline dst_reg
> -offset(dst_reg reg, unsigned delta)
> +offset(dst_reg reg, unsigned width, unsigned delta)
>  {
> -   assert(delta == 0 ||
> -  (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
> -   reg.reg_offset += delta;
> +   unsigned byte_offset = delta * width * type_sz(reg.type);
> +   add_byte_offset(, byte_offset);
> return reg;
>  }
>  
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] i965/vec4: make the offset() operate in terms of width and type

2016-08-22 Thread Michael Schellenberger Costa
Hi Iago,

Am 22.08.2016 um 11:53 schrieb Iago Toral Quiroga:
> This will make it more consistent with the FS implementation of the same
> helper and will provide more flexibility that will come in handy, for
> example, when we add a SIMD lowering pass in the vec4 backend.
> ---
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h | 47 
> ++---
>  1 file changed, 43 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index 81b6a13..058ffbb 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -60,13 +60,37 @@ retype(src_reg reg, enum brw_reg_type type)
> return reg;
>  }
>  
> +static inline void
> +add_byte_offset(backend_reg *reg, unsigned delta)
> +{
> +   const unsigned suboffset = reg->subreg_offset + delta;
> +   reg->reg_offset += suboffset / REG_SIZE;
> +   reg->subreg_offset += suboffset % REG_SIZE;
> +   assert(reg->subreg_offset % 16 == 0);
> +}
> +
>  static inline src_reg
> -offset(src_reg reg, unsigned delta)
> +offset(src_reg reg, unsigned width, unsigned delta)
>  {
> assert(delta == 0 ||
>(reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
> -   reg.reg_offset += delta;
Isnt that assert superfluous with the switch below, as you cover the
second condition in the explicit cases?
> +
> +   switch (reg.file) {
> +   case BAD_FILE:
> +  break;
> +   case MRF:
> +   case VGRF:
> +   case ATTR:
> +   case UNIFORM: {
> +  unsigned byte_offset = delta * width * type_sz(reg.type);
> +  add_byte_offset(, byte_offset);
> +  break;
> +   }
> +   default:
> +  assert(delta = 0);
delta == 0 ?

Same below.
--Michael
> +   }
> return reg;
> +
>  }
>  
>  /**
> @@ -130,12 +154,27 @@ retype(dst_reg reg, enum brw_reg_type type)
>  }
>  
>  static inline dst_reg
> -offset(dst_reg reg, unsigned delta)
> +offset(dst_reg reg, unsigned width, unsigned delta)
>  {
> assert(delta == 0 ||
>(reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
> -   reg.reg_offset += delta;
> +
> +   switch (reg.file) {
> +   case BAD_FILE:
> +  break;
> +   case MRF:
> +   case VGRF:
> +   case ATTR:
> +   case UNIFORM: {
> +  unsigned byte_offset = delta * width * type_sz(reg.type);
> +  add_byte_offset(, byte_offset);
> +  break;
> +   }
> +   default:
> +  assert(delta = 0);
> +   }
> return reg;
> +
>  }
>  
>  static inline dst_reg
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 12/34] i965/miptree: Add a helper for getting the aux isl_surf from a miptree

2016-07-05 Thread Michael Schellenberger Costa
Hi Jason,

Am 30.06.2016 um 01:22 schrieb Jason Ekstrand:
> v2: Switch on the usage when filling out formats
> 
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 119 
> ++
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |   5 ++
>  2 files changed, 124 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 8a746ec..6febb9a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -3167,6 +3167,125 @@ intel_miptree_get_isl_surf(struct brw_context *brw,
> surf->usage = 0; /* TODO */
>  }
>  
> +/* WARNING: THE SURFACE CREATED BY THIS FUNCTION IS NOT COMPLETE AND CANNOT 
> BE
> + * USED FOR ANY REAL CALCULATIONS.  THE ONLY VALID USE OF SUCH A SURFACE IS 
> TO
> + * PASS IT INTO isl_surf_fill_state.
> + */
> +void
> +intel_miptree_get_aux_isl_surf(struct brw_context *brw,
> +   const struct intel_mipmap_tree *mt,
> +   struct isl_surf *surf,
> +   enum isl_aux_usage *usage)
> +{
> +   /* Much is the same as the regular surface */
> +   intel_miptree_get_isl_surf(brw, mt->mcs_mt, surf);
> +
> +   /* Figure out the layout */
> +   if (_mesa_get_format_base_format(mt->format) == GL_DEPTH_COMPONENT) {
> +  *usage = ISL_AUX_USAGE_HIZ;
> +   } else if (mt->num_samples > 1) {
> +  if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS)
> + *usage = ISL_AUX_USAGE_MCS;
> +  else
> + *usage = ISL_AUX_USAGE_NONE;
> +   } else if (intel_miptree_is_lossless_compressed(brw, mt)) {
> +  assert(brw->gen >= 9);
> +  *usage = ISL_AUX_USAGE_CCS_E;
> +   } else if (mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_NO_MCS) {
> +  *usage = ISL_AUX_USAGE_CCS_D;
> +   } else {
> +  /* Can we even get here? */
> +  *usage = ISL_AUX_USAGE_NONE;
> +   }
> +
> +   /* Figure out the format of the auxiliary surface */
> +   switch (*usage) {
> +   case ISL_AUX_USAGE_NONE:
> +  /* Can we even get here? */
> +  break;
> +
> +   case ISL_AUX_USAGE_HIZ:
> +  if (brw->gen >= 9) {
> + /* gen9+ uses the same size HiZ buffer regardless of multisampling 
> */
> + surf->format = ISL_FORMAT_GEN9_HIZ;
> +  } else {
> + switch (mt->num_samples) {
> + case 0:
> + case 1:  surf->format = ISL_FORMAT_GEN6_HIZ_1X;break;
> + case 2:  surf->format = ISL_FORMAT_GEN6_HIZ_2X;break;
> + case 4:  surf->format = ISL_FORMAT_GEN6_HIZ_4X;break;
> + case 8:  surf->format = ISL_FORMAT_GEN6_HIZ_8X;break;
> + default:
> +unreachable("Invalid number of samples");
> + }
> +  }
> +  break;
> +
> +   case ISL_AUX_USAGE_MCS:
> +  /*
> +   * From the SKL PRM:
> +   *"When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E,
> +   *HALIGN 16 must be used."
> +   */
That comment talks about CCS_D/E which is in the cases below. Shoudlnt
the first comment from below "When MCS is enabled for non-MSRT,
HALIGN_16 must be used" apply here?
--Michael

> +  if (brw->gen >= 9)
> + assert(mt->halign == 16);
> +
> +  switch (mt->num_samples) {
> +  case 2:  surf->format = ISL_FORMAT_MCS_2X;   break;
> +  case 4:  surf->format = ISL_FORMAT_MCS_4X;   break;
> +  case 8:  surf->format = ISL_FORMAT_MCS_8X;   break;
> +  case 16: surf->format = ISL_FORMAT_MCS_16X;  break;
> +  default:
> + unreachable("Invalid number of samples");
> +  }
> +  break;
> +
> +   case ISL_AUX_USAGE_CCS_D:
> +   case ISL_AUX_USAGE_CCS_E:
> +  /*
> +   * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
> +   *
> +   *"When MCS is enabled for non-MSRT, HALIGN_16 must be used"
> +   *
> +   * From the hardware spec for GEN9:
> +   *
> +   *"When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E,
> +   *HALIGN 16 must be used."
> +   */
> +  if (brw->gen >= 9 || mt->num_samples == 1)
> + assert(mt->halign == 16);
> +
> +  if (brw->gen >= 9) {
> + assert(mt->tiling == I915_TILING_Y);
> + switch (_mesa_get_format_bytes(mt->format)) {
> + case 4:  surf->format = ISL_FORMAT_GEN9_CCS_32BPP;   break;
> + case 8:  surf->format = ISL_FORMAT_GEN9_CCS_64BPP;   break;
> + case 16: surf->format = ISL_FORMAT_GEN9_CCS_128BPP;  break;
> + default:
> +unreachable("Invalid format size for color compression");
> + }
> +  } else if (mt->tiling == I915_TILING_Y) {
> + switch (_mesa_get_format_bytes(mt->format)) {
> + case 4:  surf->format = ISL_FORMAT_GEN7_CCS_32BPP_Y;break;
> + case 8:  surf->format = ISL_FORMAT_GEN7_CCS_64BPP_Y;break;
> + case 16: surf->format = 

Re: [Mesa-dev] [PATCH 1/6] radeonsi: extract IB and bo list saving into separate functions

2016-06-22 Thread Michael Schellenberger Costa
Hi Nicolai

Am 22.06.2016 um 11:40 schrieb Nicolai Hähnle:
> From: Nicolai Hähnle 
> 
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c | 53 
> +++
>  src/gallium/drivers/radeon/r600_pipe_common.h | 12 ++
>  src/gallium/drivers/radeonsi/si_debug.c   | 39 +---
>  src/gallium/drivers/radeonsi/si_hw_context.c  | 25 +
>  src/gallium/drivers/radeonsi/si_pipe.c|  8 +---
>  src/gallium/drivers/radeonsi/si_pipe.h|  5 +--
>  6 files changed, 88 insertions(+), 54 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index fa9f70d..ee70a1a 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -302,6 +302,59 @@ static void r600_flush_dma_ring(void *ctx, unsigned 
> flags,
>   rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
>  }
>  
> +/**
> + * Store a linearized copy of all chunks of \p cs together with the buffer
> + * list in \p saved.
> + */
> +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
> + struct radeon_saved_cs *saved)
> +{
> + void *buf;
> + unsigned i;
> +
> + /* Save the IB chunks. */
> + saved->num_dw = cs->prev_dw + cs->current.cdw;
> + saved->ib = MALLOC(4 * saved->num_dw);
> + if (!saved->ib)
> + goto oom;
> +
> + buf = saved->ib;
> + for (i = 0; i < cs->num_prev; ++i) {
> + memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
> + buf += cs->prev[i].cdw;
> + }
> + memcpy(buf, cs->current.buf, cs->current.cdw * 4);
> +
> + /* Save the buffer list. */
> + saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
> + saved->bo_list = CALLOC(saved->bo_count,
> + sizeof(saved->bo_list[0]));
> + if (!saved->bo_list) {
> + FREE(saved->ib);
> + goto oom;
> + }
> + ws->cs_get_buffer_list(cs, saved->bo_list);
> +
> + return;
> +
> +oom:
> + fprintf(stderr, "%s: out of memory\n", __func__);
> + memset(saved, 0, sizeof(*saved));
Is that Goto really worth it? It costs you one extra line of code and
obfuscates things.
--Michael

> +}
> +
> +void radeon_clear_saved_cs(struct radeon_saved_cs *saved)
> +{
> + unsigned i;
> +
> + FREE(saved->ib);
> +
> + for (i = 0; i < saved->bo_count; i++)
> + pb_reference(>bo_list[i].buf, NULL);
> + FREE(saved->bo_list);
> +
> + memset(saved, 0, sizeof(*saved));
> +}
> +
>  static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
>  {
>   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index fb6d1a5..a83908d 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -458,6 +458,15 @@ struct r600_ring {
> struct pipe_fence_handle **fence);
>  };
>  
> +/* Saved CS data for debugging features. */
> +struct radeon_saved_cs {
> + uint32_t*ib;
> + unsignednum_dw;
> +
> + struct radeon_bo_list_item  *bo_list;
> + unsignedbo_count;
> +};
> +
>  struct r600_common_context {
>   struct pipe_context b; /* base class */
>  
> @@ -623,6 +632,9 @@ const char *r600_get_llvm_processor_name(enum 
> radeon_family family);
>  void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
>struct r600_resource *dst, struct r600_resource *src);
>  void r600_dma_emit_wait_idle(struct r600_common_context *rctx);
> +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
> + struct radeon_saved_cs *saved);
> +void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
>  
>  /* r600_gpu_load.c */
>  void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
> b/src/gallium/drivers/radeonsi/si_debug.c
> index b551c72..176a195 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -508,7 +508,7 @@ static void si_dump_last_ib(struct si_context *sctx, FILE 
> *f)
>  {
>   int last_trace_id = -1;
>  
> - if (!sctx->last_ib)
> + if (!sctx->last_gfx.ib)
>   return;
>  
>   if (sctx->last_trace_buf) {
> @@ -533,11 +533,8 @@ static void si_dump_last_ib(struct si_context *sctx, 
> FILE *f)
>   sctx->init_config_gs_rings->ndw,
>   -1, "IB2: Init GS rings");
>  
> - si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
> + si_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
>   last_trace_id, "IB");
> - 

Re: [Mesa-dev] [PATCH 3/5] anv/descriptor_set: Ensure that bindings are always in increasing order

2016-06-06 Thread Michael Schellenberger Costa
Hi Jason,

Am 06/06/2016 um 20:26 schrieb Jason Ekstrand:
> Since applications are allowed to specify some set of bindings which need
> not be dense they also need not be in order.  

That sentence reads strange. "Need not be" sounds like must not. Dont
you mean "Do not need to be"?
--Michael

For most things, this doesn't
> matter, but it could result getting the wrong dynamic offsets. This adds a
> quick-and-dirty sort to ensure that everything is always in increasing
> order of binding index.
> 
> Signed-off-by: Jason Ekstrand 
> Cc: Kristian Høgsberg Kristensen 
> ---
>  src/intel/vulkan/anv_descriptor_set.c | 13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/src/intel/vulkan/anv_descriptor_set.c 
> b/src/intel/vulkan/anv_descriptor_set.c
> index c977318..448ae0e 100644
> --- a/src/intel/vulkan/anv_descriptor_set.c
> +++ b/src/intel/vulkan/anv_descriptor_set.c
> @@ -89,6 +89,19 @@ VkResult anv_CreateDescriptorSetLayout(
> for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
>const VkDescriptorSetLayoutBinding *binding = 
> >pBindings[j];
>uint32_t b = binding->binding;
> +  /* We temporarily store the pointer to the binding in the
> +   * immutable_samplers pointer.  This provides us with a quick-and-dirty
> +   * way to sort the bindings by binding number.
> +   */
> +  set_layout->binding[b].immutable_samplers = (void *)binding;
> +   }
> +
> +   for (uint32_t b = 0; b <= max_binding; b++) {
> +  const VkDescriptorSetLayoutBinding *binding =
> + (void *)set_layout->binding[b].immutable_samplers;
> +
> +  if (binding == NULL)
> + continue;
>  
>assert(binding->descriptorCount > 0);
>  #ifndef NDEBUG
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] anv/pipeline: Store the (set, binding, index) tripple in the bind map

2016-06-06 Thread Michael Schellenberger Costa
Hi Jason

Am 06/06/2016 um 20:26 schrieb Jason Ekstrand:
> This way the the bind map (which we're caching) is mostly independent of
double the here
> the pipeline layout.  The only coupling remaining is that we pull the array
> size of a binding out of the layout.  However, that size is also specified
> in the shader and should always match so it's not really coupled.  This
missing solves/fixes
--Michael

> rendering issues in Dota 2.
> 
> Signed-off-by: Jason Ekstrand 
> Cc: Kristian Høgsberg Kristensen 
> ---
>  src/intel/vulkan/anv_cmd_buffer.c| 11 +++
>  src/intel/vulkan/anv_nir_apply_pipeline_layout.c |  7 ---
>  src/intel/vulkan/anv_pipeline.c  |  6 --
>  src/intel/vulkan/anv_private.h   | 11 +++
>  src/intel/vulkan/gen8_pipeline.c |  5 +++--
>  5 files changed, 25 insertions(+), 15 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
> b/src/intel/vulkan/anv_cmd_buffer.c
> index 3d37de2..5be5f3e 100644
> --- a/src/intel/vulkan/anv_cmd_buffer.c
> +++ b/src/intel/vulkan/anv_cmd_buffer.c
> @@ -809,9 +809,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer 
> *cmd_buffer,
>if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) {
>   /* Color attachment binding */
>   assert(stage == MESA_SHADER_FRAGMENT);
> - if (binding->offset < subpass->color_count) {
> + assert(binding->binding == 0);
> + if (binding->index < subpass->color_count) {
>  const struct anv_image_view *iview =
> -   fb->attachments[subpass->color_attachments[binding->offset]];
> +   fb->attachments[subpass->color_attachments[binding->index]];
>  
>  assert(iview->color_rt_surface_state.alloc_size);
>  surface_state = iview->color_rt_surface_state;
> @@ -830,7 +831,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer 
> *cmd_buffer,
>  
>struct anv_descriptor_set *set =
>   cmd_buffer->state.descriptors[binding->set];
> -  struct anv_descriptor *desc = >descriptors[binding->offset];
> +  uint32_t offset = 
> set->layout->binding[binding->binding].descriptor_index;
> +  struct anv_descriptor *desc = >descriptors[offset + 
> binding->index];
>  
>switch (desc->type) {
>case VK_DESCRIPTOR_TYPE_SAMPLER:
> @@ -927,7 +929,8 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer 
> *cmd_buffer,
>struct anv_pipeline_binding *binding = >sampler_to_descriptor[s];
>struct anv_descriptor_set *set =
>   cmd_buffer->state.descriptors[binding->set];
> -  struct anv_descriptor *desc = >descriptors[binding->offset];
> +  uint32_t offset = 
> set->layout->binding[binding->binding].descriptor_index;
> +  struct anv_descriptor *desc = >descriptors[offset + 
> binding->index];
>  
>if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER &&
>desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
> diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
> b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> index 6481269..02991be 100644
> --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> @@ -318,13 +318,13 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline 
> *pipeline,
>BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
>   set_layout->binding_count) {
>   unsigned array_size = set_layout->binding[b].array_size;
> - unsigned set_offset = set_layout->binding[b].descriptor_index;
>  
>   if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) 
> {
>  state.set[set].surface_offsets[b] = surface;
>  for (unsigned i = 0; i < array_size; i++) {
> map->surface_to_descriptor[surface + i].set = set;
> -   map->surface_to_descriptor[surface + i].offset = set_offset + 
> i;
> +   map->surface_to_descriptor[surface + i].binding = b;
> +   map->surface_to_descriptor[surface + i].index = i;
>  }
>  surface += array_size;
>   }
> @@ -333,7 +333,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline 
> *pipeline,
>  state.set[set].sampler_offsets[b] = sampler;
>  for (unsigned i = 0; i < array_size; i++) {
> map->sampler_to_descriptor[sampler + i].set = set;
> -   map->sampler_to_descriptor[sampler + i].offset = set_offset + 
> i;
> +   map->sampler_to_descriptor[sampler + i].binding = b;
> +   map->sampler_to_descriptor[sampler + i].index = i;
>  }
>  sampler += array_size;
>   }
> diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
> index cdbf60b..959fbbd 100644
> --- a/src/intel/vulkan/anv_pipeline.c
> +++ b/src/intel/vulkan/anv_pipeline.c
> 

Re: [Mesa-dev] [PATCH v2] i965: Fix point size with tessellation/geometry shaders in GLES.

2016-06-05 Thread Michael Schellenberger Costa
Hi Kenneth,

Am 03.06.2016 um 10:08 schrieb Kenneth Graunke:
> Our previous code worked for desktop GL, and ES without geometry or
> tessellation shaders.  But those features require fancier point size
> handling.  Fortunately, we can use one rule for all APIs.
> 
> Fixes a number of dEQP tests with EXT_tessellation_shader enabled:
> dEQP-GLES31.functional.tessellation_geometry_interaction.point_size.*
> 
> Cc: Ilia Mirkin 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_state.h | 49 
> +++
>  src/mesa/drivers/dri/i965/gen6_sf_state.c |  5 ++--
>  src/mesa/drivers/dri/i965/gen7_sf_state.c |  7 +++--
>  src/mesa/drivers/dri/i965/gen8_sf_state.c |  7 +++--
>  4 files changed, 59 insertions(+), 9 deletions(-)
> 
> Hey Ilia,
> 
> Thanks for helping me figure out these rules on IRC yesterday.
> I think this version should work...
> 
>  --Ken
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
> b/src/mesa/drivers/dri/i965/brw_state.h
> index 0a4c21f..be7f6ce 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -473,6 +473,55 @@ is_drawing_lines(const struct brw_context *brw)
> return false;
>  }
>  
> +static inline bool
> +use_state_point_size(const struct brw_context *brw)
> +{
> +   const struct gl_context *ctx = >ctx;
> +
> +   /* Section 14.4 (Points) of the OpenGL 4.5 specification says:
> +*
> +*"If program point size mode is enabled, the derived point size is
> +* taken from the (potentially clipped) shader built-in gl_PointSize
> +* written by:
> +*
> +** the geometry shader, if active;
> +** the tessellation evaluation shader, if active and no
> +*  geometry shader is active;
> +** the vertex shader, otherwise
> +*
> +*and clamped to the implementation-dependent point size range.  If
> +*the value written to gl_PointSize is less than or equal to zero,
> +*or if no value was written to gl_PointSize, results are undefined.
> +*If program point size mode is disabled, the derived point size is
> +*specified with the command
> +*
> +*   void PointSize(float size);
> +*
> +*size specifies the requested size of a point.  The default value
> +*is 1.0."
> +*
> +* The rules for GLES come from the ES 3.2, OES_geometry_point_size, and
> +* OES_tessellation_point_size specifications.  To summarize: if the last
> +* stage before rasterization is a GS or TES, then use gl_PointSize from
> +* the shader if written.  Otherwise, use 1.0.  If the last stage is a
> +* vertex shader, use gl_PointSize, or it is undefined.
> +*
> +* We can combine these rules into a single condition for both APIs.
> +* Using the state point size when the last shader stage doesn't write
> +* gl_PointSize satisfies GL's requirements, as it's undefined.  Because
> +* ES doesn't have a PointSize() command, the state point size will
> +* remain 1.0, satisfying the ES default value in the GS/TES case, and
> +* the VS case (1.0 works for "undefined").  Mesa sets the program point
> +* mode flag to always-enabled in ES, so we can safely check that, and
> +* it'll be ignored for ES.
> +*
> +* _NEW_PROGRAM | _NEW_POINT
> +* BRW_NEW_VUE_MAP_GEOM_OUT
> +*/
> +   return (!ctx->VertexProgram.PointSizeEnabled && !ctx->Point._Attenuated) 
> ||
> +  (brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0;
That last condition looks decidedly weird. Are those brackets correct?
--Michael
> +}
> +
>  
>  #ifdef __cplusplus
>  }
> diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
> b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> index 0538ab7..94731e0 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> @@ -373,9 +373,8 @@ upload_sf_state(struct brw_context *brw)
> if (multisampled_fbo && ctx->Multisample.Enabled)
>dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
>  
> -   /* _NEW_PROGRAM | _NEW_POINT */
> -   if (!(ctx->VertexProgram.PointSizeEnabled ||
> -  ctx->Point._Attenuated))
> +   /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
> +   if (use_state_point_size(brw))
>dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
>  
> /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
> diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c 
> b/src/mesa/drivers/dri/i965/gen7_sf_state.c
> index d3a658c..8d49e24 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
> @@ -213,8 +213,8 @@ upload_sf_state(struct brw_context *brw)
>  
> dw3 = GEN6_SF_LINE_AA_MODE_TRUE;
>  
> -   /* _NEW_PROGRAM | _NEW_POINT */
> -   if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
> +   /* _NEW_PROGRAM | _NEW_POINT, 

Re: [Mesa-dev] [PATCH 2/4] anv/apply_dynamic_offsets: Use rewrite_src instead of a regular assignment

2016-05-22 Thread Michael Schellenberger Costa
Hi Jason,

Am 19.05.2016 um 06:42 schrieb Jason Ekstrand:
> Originally we removed the instruction, changed the source, and then
> re-inserted it.  This works, but nir_instr_rewrite_src is a bit more
> obviously correct.
> ---
>  src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c 
> b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
> index 6a28953..a7ed3e5 100644
> --- a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
> +++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
> @@ -80,9 +80,10 @@ apply_dynamic_offsets_block(nir_block *block, nir_builder 
> *b,
>nir_ssa_dest_init(_load->instr, _load->dest, 2, 32, 
> NULL);
>nir_builder_instr_insert(b, _load->instr);
>  
> -  nir_src *offset_src = nir_get_io_offset_src(intrin);
I think you didnt want to remove *offset_src here, given that it is used
below and reintroduced in the next patch?
--Michael
> -  nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
> +  nir_ssa_def *new_offset = nir_iadd(b, nir_ssa_for_src(*offset_src),
>   _load->dest.ssa);
> +  nir_instr_rewrite_src(>instr, nir_get_io_offset_src(intrin),
> +nir_src_for_ssa(new_offset));
>  
>/* In order to avoid out-of-bounds access, we predicate */
>nir_ssa_def *pred = nir_uge(b, nir_channel(b, _load->dest.ssa, 
> 1),
> @@ -92,7 +93,6 @@ apply_dynamic_offsets_block(nir_block *block, nir_builder 
> *b,
>nir_cf_node_insert(b->cursor, _stmt->cf_node);
>  
>nir_instr_remove(>instr);
> -  *offset_src = nir_src_for_ssa(new_offset);
>nir_instr_insert_after_cf_list(_stmt->then_list, >instr);
>  
>if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] glsl/linker: dvec3/dvec4 consume twice input vertex attributes

2016-05-22 Thread Michael Schellenberger Costa
Hi Juan,

Am 18.05.2016 um 13:19 schrieb Juan A. Suarez Romero:
> From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
> 
> "A program with more than the value of MAX_VERTEX_ATTRIBS
> active attribute variables may fail to link, unless
> device-dependent optimizations are able to make the program
> fit within available hardware resources. For the purposes
> of this test, attribute variables of the type dvec3, dvec4,
> dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
> count as consuming twice as many attributes as equivalent
> single-precision types. While these types use the same number
> of generic attributes as their single-precision equivalents,
> implementations are permitted to consume two single-precision
> vectors of internal storage for each three- or four-component
> double-precision vector."
> 
> This commits makes dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4,
> dmat4x3 and dmat4 consume twice as many attributes as equivalent
> single-precision types.
> 
> v3: count doubles as consuming two attributes (Dave Airlie)
> 
> Signed-off-by: Antia Puentes 
> Signed-off-by: Juan A. Suarez Romero 
> ---
>  src/compiler/glsl/linker.cpp | 19 +++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index f208d19..ad96a63 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -2873,6 +2873,25 @@ assign_attribute_or_color_locations(gl_shader_program 
> *prog,
>to_assign[i].var->data.location = generic_base + location;
>to_assign[i].var->data.is_unmatched_generic_inout = 0;
>used_locations |= (use_mask << location);
> +
> +  if (to_assign[i].var->type->without_array()->is_dual_slot_double())
> + double_storage_locations |= (use_mask << location);
> +   }
> +
> +   /* Now that we have all the locations, take in account that dvec3/4 can
> +* require twice the space of single-precision vectors. Check if we run 
> out
> +* of attribute slots.
> +*/
Would you mind citing the spec section from the commit message here
(only reference not the text)?
--Michael

> +   if (target_index == MESA_SHADER_VERTEX) {
> +  unsigned total_attribs_size =
> + _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
> + _mesa_bitcount(double_storage_locations);
> +  if (total_attribs_size > max_index) {
> +  linker_error(prog,
> +   "attempt to use %d vertex attribute slots only %d 
> available ",
> +   total_attribs_size, max_index);
> +  return false;
> +  }
> }
>  
> return true;
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/13] vbo: Declare the index range invalid for DrawIndirect

2016-05-22 Thread Michael Schellenberger Costa
Hi Jason,

Am 19.05.2016 um 09:20 schrieb Jason Ekstrand:
> Right now, we're just setting the range to [0, MAX_UINT32] which, while
> correct isn't helpful.  With DrawIndirect, you can't really know what the
> actual range is so we may as well flag it as being an invalid range.  This
> is what we do for draws with index buffer which is similar (the indices
> aren't statically known) if a bit simpler.
> 
> Cc: "10.2" 
> ---
>  src/mesa/vbo/vbo_context.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
> index 9f807a1..ae5d265 100644
> --- a/src/mesa/vbo/vbo_context.c
> +++ b/src/mesa/vbo/vbo_context.c
> @@ -170,7 +170,7 @@ vbo_draw_indirect_prims(struct gl_context *ctx,
> }
>  
> vbo->draw_prims(ctx, prim, draw_count,
> -   ib, GL_TRUE, 0, ~0,
> +   ib, false, ~0, ~0,
Out of curiosity. The former was GL_TRUE so shouldn't you use GL_FALSE
here or do you plan to migrate that? If you do you might want to sed
them for that file?
--Michael
> NULL, 0,
> ctx->DrawIndirectBuffer);
>  
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/12] nir: Don't use ffma in nir_lower_wpos_ytransform().

2016-05-22 Thread Michael Schellenberger Costa
Hi Kenneth,

Am 19.05.2016 um 00:00 schrieb Kenneth Graunke:
> ffma is an explicitly fused multiply add with higher precision.
> The optimizer will take care of promoting mul/add to fma when
> it's beneficial to do so.
> 
> This fixes failures on Gen4-5 when using this pass, as those platforms
> don't actually implement fma().
I assume here you mean ffma()?
--Michael
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/compiler/nir/nir_lower_wpos_ytransform.c | 20 
>  1 file changed, 8 insertions(+), 12 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c 
> b/src/compiler/nir/nir_lower_wpos_ytransform.c
> index 36e25b9..ccf0fd3 100644
> --- a/src/compiler/nir/nir_lower_wpos_ytransform.c
> +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
> @@ -123,19 +123,15 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state,
>  * inversion/identity, or the other way around if we're drawing to an FBO.
>  */
> if (invert) {
> -  /* MAD wpos_temp.y, wpos_input, wpostrans., wpostrans.
> -   */
> -  wpos_temp_y = nir_ffma(b,
> - nir_channel(b, wpos_temp, 1),
> - nir_channel(b, wpostrans, 0),
> - nir_channel(b, wpostrans, 1));
> +  /* wpos_temp.y = wpos_input * wpostrans. + wpostrans. */
> +  wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
> +nir_channel(b, wpostrans, 0)),
> +nir_channel(b, wpostrans, 1));
> } else {
> -  /* MAD wpos_temp.y, wpos_input, wpostrans., wpostrans.
> -   */
> -  wpos_temp_y = nir_ffma(b,
> - nir_channel(b, wpos_temp, 1),
> - nir_channel(b, wpostrans, 2),
> - nir_channel(b, wpostrans, 3));
> +  /* wpos_temp.y = wpos_input * wpostrans. + wpostrans. */
> +  wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
> +nir_channel(b, wpostrans, 2)),
> +nir_channel(b, wpostrans, 3));
> }
>  
> wpos_temp = nir_vec4(b,
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/4] anv/pipeline: Bounds-check resource indices when robuts_buffer_access is enabled

2016-05-22 Thread Michael Schellenberger Costa
Hi Jason,

Am 19.05.2016 um 09:22 schrieb Jason Ekstrand:
> ---
>  src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 52 
> 
>  1 file changed, 35 insertions(+), 17 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
> b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> index 91f4322..7e66149 100644
> --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
> @@ -29,6 +29,9 @@ struct apply_pipeline_layout_state {
> nir_shader *shader;
> nir_builder builder;
>  
> +   struct anv_pipeline_layout *layout;
> +   bool add_bounds_checks;
> +
> struct {
>BITSET_WORD *used;
>uint8_t *surface_offsets;
> @@ -110,17 +113,15 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
> uint32_t binding = nir_intrinsic_binding(intrin);
>  
> uint32_t surface_index = state->set[set].surface_offsets[binding];
> +   uint32_t array_size =
> +  state->layout->set[set].layout->binding[binding].array_size;
>  
> -   nir_const_value *const_block_idx =
> -  nir_src_as_const_value(intrin->src[0]);
> +   nir_ssa_def *block_index = nir_ssa_for_src(b, intrin->src[0], 1);
>  
> -   nir_ssa_def *block_index;
> -   if (const_block_idx) {
> -  block_index = nir_imm_int(b, surface_index + const_block_idx->u32[0]);
> -   } else {
> -  block_index = nir_iadd(b, nir_imm_int(b, surface_index),
> - nir_ssa_for_src(b, intrin->src[0], 1));
> -   }
> +   if (state->add_bounds_checks)
> +  block_index = nir_umax(b, block_index, nir_imm_int(b, array_size - 1));
> +
> +   block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
Here you do
|   if (state->add_bounds_checks)
|   block_index = nir_umax(...);
|   block_index = nir_iadd(...);

Below you do
|   nir_ssa_def *index = iadd(...);
|   if (state->add_bounds_checks)
|   index = nir_umax(...);

Are both functionally equivalent? Also why do you one time do

| nir_ssa_def *block_index = nir_ssa_for_src()

and the other time put that directly into nir_iadd()?

Could you unify the code so that both cases are equivalent?
--Michael


>  
> assert(intrin->dest.is_ssa);
> nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(block_index));
> @@ -129,16 +130,24 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
>  
>  static void
>  lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
> -unsigned *const_index, nir_tex_src_type src_type,
> +unsigned *const_index, unsigned array_size,
> +nir_tex_src_type src_type,
>  struct apply_pipeline_layout_state *state)
>  {
> +   nir_builder *b = >builder;
> +
> if (deref->deref.child) {
>assert(deref->deref.child->deref_type == nir_deref_type_array);
>nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
>  
> -  *const_index += deref_array->base_offset;
> -
>if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
> + nir_ssa_def *index =
> +nir_iadd(b, nir_imm_int(b, deref_array->base_offset),
> +nir_ssa_for_src(b, deref_array->indirect, 1));
> +
> + if (state->add_bounds_checks)
> +index = nir_umax(b, index, nir_imm_int(b, array_size - 1));
> +
>   nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
> tex->num_srcs + 1);
>  
> @@ -154,10 +163,11 @@ lower_tex_deref(nir_tex_instr *tex, nir_deref_var 
> *deref,
>* first-class texture source.
>*/
>   tex->src[tex->num_srcs].src_type = src_type;
> + nir_instr_rewrite_src(>instr, >src[tex->num_srcs],
> +   nir_src_for_ssa(index));
>   tex->num_srcs++;
> - assert(deref_array->indirect.is_ssa);
> - nir_instr_rewrite_src(>instr, >src[tex->num_srcs - 1].src,
> -   deref_array->indirect);
> +  } else {
> + *const_index += MIN2(deref_array->base_offset, array_size - 1);
>}
> }
>  }
> @@ -182,17 +192,23 @@ lower_tex(nir_tex_instr *tex, struct 
> apply_pipeline_layout_state *state)
> /* No one should have come by and lowered it already */
> assert(tex->texture);
>  
> +   state->builder.cursor = nir_before_instr(>instr);
> +
> unsigned set = tex->texture->var->data.descriptor_set;
> unsigned binding = tex->texture->var->data.binding;
> +   unsigned array_size =
> +  state->layout->set[set].layout->binding[binding].array_size;
> tex->texture_index = state->set[set].surface_offsets[binding];
> -   lower_tex_deref(tex, tex->texture, >texture_index,
> +   lower_tex_deref(tex, tex->texture, >texture_index, array_size,
> nir_tex_src_texture_offset, state);
>  
> if (tex->sampler) {
>unsigned set = tex->sampler->var->data.descriptor_set;
> 

Re: [Mesa-dev] [PATCH 1/2] i965: Reduce the SIMD8 GS push constant threshold from 32 to 24.

2016-05-10 Thread Michael Schellenberger Costa
Hi Kenneth,

can you update the comment too?

Michael

Am 10.05.2016 um 09:46 schrieb Kenneth Graunke:
> Three Shadow of Mordor geometry shaders increase by a single
> instruction, but the number of spills/fills in Orbital Explorer
> is reduced from 194:1279 -> 82:454.  No other programs are affected.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index ce3e00b..332e382 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -5155,7 +5155,7 @@ fs_visitor::setup_gs_payload()
> }
>  
> /* Use a maximum of 32 registers for push-model inputs. */
> -   const unsigned max_push_components = 32;
> +   const unsigned max_push_components = 24;
>  
> /* If pushing our inputs would take too many registers, reduce the URB 
> read
>  * length (which is in HWords, or 8 registers), and resort to pulling.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCHv2 10/23] i965/fs: Stop using the LOAD_PAYLOAD instruction in lower_simd_width.

2016-05-04 Thread Michael Schellenberger Costa
Hi Curro,

Am 04.05.2016 um 06:26 schrieb Francisco Jerez:
> Instead of using the LOAD_PAYLOAD instruction (emitted through the
> emit_transpose() helper that is no longer useful and this commit
> removes) which had to be marked force_writemask_all in some cases,
> emit a series of moves to apply proper channel enable signals to the
> destination.  Until now lower_simd_width() had mainly been used to
> lower things that invariably had a basic block-local temporary as
> destination so it didn't seem like a big deal, but I found it to be
> the reason for several Piglit regressions in my SIMD32 branch and
> Igalia discovered the same issue independently while working on FP64
> support.
> ---
> This is taken from the following WIP series:
>   https://cgit.freedesktop.org/~currojerez/mesa/log/?h=i965-late-simd-lowering
>
> See also:
>   https://lists.freedesktop.org/archives/mesa-dev/2016-May/115596.html
>
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 58 
> +++-
>  1 file changed, 18 insertions(+), 40 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 4b6aa67..34599cb 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -4625,29 +4625,6 @@ get_lowered_simd_width(const struct brw_device_info 
> *devinfo,
> }
>  }
>  
> -/**
> - * The \p rows array of registers represents a \p num_rows by \p num_columns
> - * matrix in row-major order, write it in column-major order into the 
> register
> - * passed as destination.  \p stride gives the separation between matrix
> - * elements in the input in fs_builder::dispatch_width() units.
> - */
> -static void
> -emit_transpose(const fs_builder ,
> -   const fs_reg , const fs_reg *rows,
> -   unsigned num_rows, unsigned num_columns, unsigned stride)
> -{
> -   fs_reg *const components = new fs_reg[num_rows * num_columns];
> -
> -   for (unsigned i = 0; i < num_columns; ++i) {
> -  for (unsigned j = 0; j < num_rows; ++j)
> - components[num_rows * i + j] = offset(rows[j], bld, stride * i);
> -   }
> -
> -   bld.LOAD_PAYLOAD(dst, components, num_rows * num_columns, 0);
> -
> -   delete[] components;
> -}
> -
>  bool
>  fs_visitor::lower_simd_width()
>  {
> @@ -4698,16 +4675,19 @@ fs_visitor::lower_simd_width()
> if (inst->src[j].file != BAD_FILE &&
> !is_uniform(inst->src[j])) {
>/* Get the i-th copy_width-wide chunk of the source. */
> -  const fs_reg src = horiz_offset(inst->src[j], copy_width * 
> i);
> +  const fs_builder cbld = lbld.group(copy_width, 0);
> +  const fs_reg src = offset(inst->src[j], cbld, i);
>const unsigned src_size = inst->components_read(j);
>  
> -  /* Use a trivial transposition to copy one every n
> -   * copy_width-wide components of the register into a
> -   * temporary passed as source to the lowered instruction.
> +  /* Copy one every n copy_width-wide components of the
That first sentence is really unclear. Shouldnt it sound "Copy each
copy_width-wide component of..."

Michael
> +   * register into a temporary passed as source to the 
> lowered
> +   * instruction.
> */
>split_inst.src[j] = lbld.vgrf(inst->src[j].type, src_size);
> -  emit_transpose(lbld.group(copy_width, 0),
> - split_inst.src[j], , 1, src_size, n);
> +
> +  for (unsigned k = 0; k < src_size; ++k)
> + cbld.MOV(offset(split_inst.src[j], lbld, k),
> +  offset(src, cbld, n * k));
> }
>  }
>  
> @@ -4726,20 +4706,18 @@ fs_visitor::lower_simd_width()
>   }
>  
>   if (inst->regs_written) {
> -/* Distance between useful channels in the temporaries, skipping
> - * garbage if the lowered instruction is wider than the original.
> - */
> -const unsigned m = lower_width / copy_width;
> +const fs_builder lbld = ibld.group(lower_width, 0);
>  
>  /* Interleave the components of the result from the lowered
> - * instructions.  We need to set exec_all() when copying more 
> than
> - * one half per component, because LOAD_PAYLOAD (in terms of 
> which
> - * emit_transpose is implemented) can only use the same channel
> - * enable signals for all of its non-header sources.
> + * instructions.
>   */
> -emit_transpose(ibld.exec_all(inst->exec_size > copy_width)
> -   .group(copy_width, 0),
> -   inst->dst, dsts, n, dst_size, m);
> +for (unsigned i = 0; i < dst_size; ++i) {
> +   

Re: [Mesa-dev] [PATCH 02/18] anv: Add a new block-based batch emit macro

2016-04-19 Thread Michael Schellenberger Costa
Hi Jason,

stupid optional nitpick , but could you go for anv_batch_emit_block?
Normally abbreviations below 3 characters aren't really worth it.

Michael

Am 19/04/2016 um 02:10 schrieb Jason Ekstrand:
> This new macro uses a for loop to create an actual code block in which to
> place the macro setup code.  One advantage of this is that you syntatically
> use braces instead of parentheses.  Another is that the code in the block
> doesn't even get executed if anv_batch_emit_dwords fails.
> ---
>  src/intel/vulkan/anv_private.h | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index ae2e08d..d59b7ed 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -861,6 +861,15 @@ __gen_combine_address(struct anv_batch *batch, void 
> *location,
>VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
> } while (0)
>  
> +#define anv_batch_emit_blk(batch, cmd, name)\
> +   for (struct cmd name = { __anv_cmd_header(cmd) },\
> +*_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));\
> +__builtin_expect(_dst != NULL, 1);  \
> +({ __anv_cmd_pack(cmd)(batch, _dst, ); \
> +   VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 
> 4)); \
> +   _dst = NULL; \
> + }))
> +
>  #define anv_state_pool_emit(pool, cmd, align, ...) ({   \
>const uint32_t __size = __anv_cmd_length(cmd) * 4;\
>struct anv_state __state =\
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] radeonsi: Set range metadata on calls to llvm.SI.tid

2016-04-19 Thread Michael Schellenberger Costa
Hi Tom,

Am 19.04.2016 um 19:52 schrieb Tom Stellard:
> The range metadata tells LLVM the range of expected values for this intrinsic,
> so it can do some additional optimizations on the result.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 29 ++---
>  1 file changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 3b6d6e9..b4f2a42 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1114,12 +1114,35 @@ static LLVMValueRef get_sample_id(struct 
> radeon_llvm_context *radeon_bld)
>   SI_PARAM_ANCILLARY, 8, 4);
>  }
>  
> +/**
> + * Set range metadata on an instruction.  This can only be used on load and
> + * call instructions.  To specify an instruciton can only produce the values
> + * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
> + * \p lo is the minimum value inclusive.
> + * \p hi is the maximum value exclusive.
> + */
> +static void set_range_metadata(LLVMValueRef value, unsigned lo, unsigned hi)
> +{
> + const char *range_md_string = "range";
> + LLVMValueRef range_md, md_args[2];
> + LLVMTypeRef type = LLVMTypeOf(value);
> + LLVMContextRef context = LLVMGetTypeContext(type);
> + unsigned md_range_id = LLVMGetMDKindIDInContext(context,
> + range_md_string, strlen(range_md_string));
> +
> + md_args[0] = LLVMConstInt(type, lo, false);
> + md_args[1] = LLVMConstInt(type, hi, false);
> + range_md = LLVMMDNodeInContext(context, md_args, 2);
> + LLVMSetMetadata(value, md_range_id, range_md);
> +}
> +
>  static LLVMValueRef get_thread_id(struct si_shader_context *ctx)
>  {
>   struct gallivm_state *gallivm = >radeon_bld.gallivm;
> -
> - return lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", ctx->i32,
> -NULL, 0, LLVMReadNoneAttribute);
> + LLVMValueRef tid = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid",
> + ctx->i32,   NULL, 0, LLVMReadNoneAttribute);

same here, why not use the helper from patch 1?
Michael
> + set_range_metadata(tid, 0, 64);
> + return tid;
>  }
>  
>  /**

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] radeonsi: Use llvm.amdgcn.mbcnt.* intrinsics instead of llvm.SI.tid v2

2016-04-19 Thread Michael Schellenberger Costa
Hi Tom

Am 19.04.2016 um 19:52 schrieb Tom Stellard:
> We're trying to move to more of the new style intrinsics with include
> the correct target name, and map directly to ISA instructions.
>
> v2:
>   - Only do this with LLVM 3.8 and newer.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index b4f2a42..2a747f9 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1139,8 +1139,23 @@ static void set_range_metadata(LLVMValueRef value, 
> unsigned lo, unsigned hi)
>  static LLVMValueRef get_thread_id(struct si_shader_context *ctx)
>  {
>   struct gallivm_state *gallivm = >radeon_bld.gallivm;
> - LLVMValueRef tid = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid",
> + LLVMValueRef tid;
> +
> + if (HAVE_LLVM < 0x0308) {
> + tid = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid",
>   ctx->i32,   NULL, 0, LLVMReadNoneAttribute);
Is there a reason not to use the helper from patch 1?
Michael
> + } else {
> + LLVMValueRef tid_args[2];
> + tid_args[0] = lp_build_const_int32(gallivm, 0x);
> + tid_args[1] = lp_build_const_int32(gallivm, 0);
> + tid_args[1] = lp_build_intrinsic(gallivm->builder,
> + "llvm.amdgcn.mbcnt.lo", ctx->i32,
> + tid_args, 2, LLVMReadNoneAttribute);
> +
> + tid = lp_build_intrinsic(gallivm->builder,
> + "llvm.amdgcn.mbcnt.hi", ctx->i32,
> + tid_args, 2, LLVMReadNoneAttribute);
> + }
>   set_range_metadata(tid, 0, 64);
>   return tid;
>  }

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965/vec4: Move can_do_writemask to vec4_instruction

2016-04-06 Thread Michael Schellenberger Costa
Hi Jason,

Am 06.04.2016 um 06:11 schrieb Jason Ekstrand:
> ---
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h|  1 +
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 28 +++
>  .../dri/i965/brw_vec4_dead_code_eliminate.cpp  | 31 
> +-
>  3 files changed, 30 insertions(+), 30 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index 2b6872e..81b6a13 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -172,6 +172,7 @@ public:
>int swizzle, int swizzle_mask);
> void reswizzle(int dst_writemask, int swizzle);
> bool can_do_source_mods(const struct brw_device_info *devinfo);
> +   bool can_do_writemask(const struct brw_device_info *devinfo);
> bool can_change_types() const;
> bool has_source_and_destination_hazard() const;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 0025343..4d0efa8 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -239,6 +239,34 @@ vec4_instruction::can_do_source_mods(const struct 
> brw_device_info *devinfo)
>  }
>  
>  bool
> +vec4_instruction::can_do_writemask(const struct brw_device_info *devinfo)
> +{
> +   switch (opcode) {
> +   case SHADER_OPCODE_GEN4_SCRATCH_READ:
> +   case VS_OPCODE_PULL_CONSTANT_LOAD:
> +   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> +   case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
Can you order the stages alphabetically here, that improves readability
a lot?
Michael
> +   case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
> +   case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
> +   case TES_OPCODE_CREATE_INPUT_READ_HEADER:
> +   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
> +   case VEC4_OPCODE_URB_READ:
> +  return false;
> +   default:
> +  /* The MATH instruction on Gen6 only executes in align1 mode, which 
> does
> +   * not support writemasking.
> +   */
> +  if (devinfo->gen == 6 && is_math())
> + return false;
> +
> +  if (is_tex())
> + return false;
> +
> +  return true;
> +   }
> +}
> +
> +bool
>  vec4_instruction::can_change_types() const
>  {
> return dst.type == src[0].type &&
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> index 166bc17..c643212 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> @@ -36,35 +36,6 @@
>  
>  using namespace brw;
>  
> -static bool
> -can_do_writemask(const struct brw_device_info *devinfo,
> - const vec4_instruction *inst)
> -{
> -   switch (inst->opcode) {
> -   case SHADER_OPCODE_GEN4_SCRATCH_READ:
> -   case VS_OPCODE_PULL_CONSTANT_LOAD:
> -   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> -   case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
> -   case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
> -   case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
> -   case TES_OPCODE_CREATE_INPUT_READ_HEADER:
> -   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
> -   case VEC4_OPCODE_URB_READ:
> -  return false;
> -   default:
> -  /* The MATH instruction on Gen6 only executes in align1 mode, which 
> does
> -   * not support writemasking.
> -   */
> -  if (devinfo->gen == 6 && inst->is_math())
> - return false;
> -
> -  if (inst->is_tex())
> - return false;
> -
> -  return true;
> -   }
> -}
> -
>  bool
>  vec4_visitor::dead_code_eliminate()
>  {
> @@ -101,7 +72,7 @@ vec4_visitor::dead_code_eliminate()
>  /* If the instruction can't do writemasking, then it's all or
>   * nothing.
>   */
> -if (!can_do_writemask(devinfo, inst)) {
> +if (!inst->can_do_writemask(devinfo)) {
> bool result = result_live[0] | result_live[1] |
>   result_live[2] | result_live[3];
> result_live[0] = result;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] i965/vec4: Handle MOV_INDIRECT in pack_uniform_registers

2016-04-06 Thread Michael Schellenberger Costa
Hi Jason,

Am 06.04.2016 um 06:11 schrieb Jason Ekstrand:
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index e2aa109..6433fc5 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -574,6 +574,24 @@ vec4_visitor::pack_uniform_registers()
> BRW_GET_SWZ(inst->src[i].swizzle, c) + 1);
>   }
>}
> +
> +  if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
> +  inst->src[0].file == UNIFORM) {
> + assert(inst->src[2].file == BRW_IMMEDIATE_VALUE);
> + assert(inst->src[0].subnr == 0);
> +
> + unsigned bytes_read = inst->src[2].ud;
> + assert(bytes_read % 4 == 0);
> + unsigned vec4s_read = DIV_ROUND_UP(bytes_read, 16);
> +
> + /* We just mark every register touched by a MOV_INDIRECT as being
> +  * fully used.  This ensures that it doesn't broken up piecewise by
There is an additional whitespace before This and "it doesn't broken up
" needs some love. Maybe "it doesnt get broken up"?
Michael
> +  * the next part of our packing algorithm.
> +  */
> + int reg = inst->src[0].nr;
> + for (unsigned i = 0; i < vec4s_read; i++)
> +chans_used[reg + i] = 4;
> +  }
> }
>  
> int new_uniform_count = 0;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/41] glapi: clean imports in python files

2016-04-01 Thread Michael Schellenberger Costa
Hi,

minor nitpicks wrt ordering below

Am 01.04.2016 um 02:04 schrieb Dylan Baker:
> Completely clean the imports:
> - Split so that one module is imported per line
> - Remove unused imports
> - Group stdlib imports, then 3rd party modules, and finally local
>   modules
> - sort alphabetically within those groups
>
> Signed-off-by: Dylan Baker 
> ---
>  src/mapi/glapi/gen/glX_XML.py  | 1 -
>  src/mapi/glapi/gen/glX_proto_common.py | 4 +++-
>  src/mapi/glapi/gen/glX_proto_recv.py   | 4 +++-
>  src/mapi/glapi/gen/glX_proto_send.py   | 9 +++--
>  src/mapi/glapi/gen/glX_proto_size.py   | 5 +++--
>  src/mapi/glapi/gen/glX_server_table.py | 5 -
>  src/mapi/glapi/gen/gl_SPARC_asm.py | 3 ++-
>  src/mapi/glapi/gen/gl_XML.py   | 8 +---
>  src/mapi/glapi/gen/gl_apitemp.py   | 3 ++-
>  src/mapi/glapi/gen/gl_enums.py | 7 +++
>  src/mapi/glapi/gen/gl_genexec.py   | 6 +++---
>  src/mapi/glapi/gen/gl_gentable.py  | 3 ++-
>  src/mapi/glapi/gen/gl_x86-64_asm.py| 3 ++-
>  src/mapi/glapi/gen/gl_x86_asm.py   | 3 ++-
>  src/mapi/glapi/gen/remap_helper.py | 2 +-
>  src/mapi/glapi/gen/typeexpr.py | 4 +++-
>  16 files changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/src/mapi/glapi/gen/glX_XML.py b/src/mapi/glapi/gen/glX_XML.py
> index 12ff291..6987e27 100644
> --- a/src/mapi/glapi/gen/glX_XML.py
> +++ b/src/mapi/glapi/gen/glX_XML.py
> @@ -27,7 +27,6 @@
>  
>  import gl_XML
>  import license
> -import sys, getopt, string
>  
>  
>  class glx_item_factory(gl_XML.gl_item_factory):
> diff --git a/src/mapi/glapi/gen/glX_proto_common.py 
> b/src/mapi/glapi/gen/glX_proto_common.py
> index ae2c2d5..bcfe009 100644
> --- a/src/mapi/glapi/gen/glX_proto_common.py
> +++ b/src/mapi/glapi/gen/glX_proto_common.py
> @@ -25,9 +25,11 @@
>  # Authors:
>  #Ian Romanick 
>  
> -import gl_XML, glX_XML
>  import string
>  
> +import gl_XML
> +import glX_XML
You majoritely put glX_* first in the rest of the patch. maybe here too?
> +
>  
>  class glx_proto_item_factory(glX_XML.glx_item_factory):
>  """Factory to create GLX protocol oriented objects derived from 
> gl_item."""
> diff --git a/src/mapi/glapi/gen/glX_proto_recv.py 
> b/src/mapi/glapi/gen/glX_proto_recv.py
> index afee388..ec3f7d7 100644
> --- a/src/mapi/glapi/gen/glX_proto_recv.py
> +++ b/src/mapi/glapi/gen/glX_proto_recv.py
> @@ -28,7 +28,9 @@
>  import argparse
>  import string
>  
> -import gl_XML, glX_XML, glX_proto_common, license
> +import glX_proto_common
> +import gl_XML
glX_XML is missing here, is that intentional?
> +import license
>  
>  
>  class PrintGlxDispatch_h(gl_XML.gl_print_base):
> diff --git a/src/mapi/glapi/gen/glX_proto_send.py 
> b/src/mapi/glapi/gen/glX_proto_send.py
> index 2b33030..5904b33 100644
> --- a/src/mapi/glapi/gen/glX_proto_send.py
> +++ b/src/mapi/glapi/gen/glX_proto_send.py
> @@ -28,9 +28,14 @@
>  #Jeremy Kolb 
>  
>  import argparse
> +import copy
> +import string
> +
> +import gl_XML
> +import glX_XML
> +import glX_proto_common
> +import license
>  
> -import gl_XML, glX_XML, glX_proto_common, license
> -import copy, string
>  
>  def convertStringForXCB(str):
>  tmp = ""
> diff --git a/src/mapi/glapi/gen/glX_proto_size.py 
> b/src/mapi/glapi/gen/glX_proto_size.py
> index 75fc26f..b008e9d 100644
> --- a/src/mapi/glapi/gen/glX_proto_size.py
> +++ b/src/mapi/glapi/gen/glX_proto_size.py
> @@ -26,9 +26,10 @@
>  #Ian Romanick 
>  
>  import argparse
> -import sys, string
> +import string
>  
> -import gl_XML, glX_XML
> +import glX_XML
> +import gl_XML
>  import license
>  
>  
> diff --git a/src/mapi/glapi/gen/glX_server_table.py 
> b/src/mapi/glapi/gen/glX_server_table.py
> index 2d21f4e..5e996eb 100644
> --- a/src/mapi/glapi/gen/glX_server_table.py
> +++ b/src/mapi/glapi/gen/glX_server_table.py
> @@ -27,7 +27,10 @@
>  
>  import argparse
>  
> -import gl_XML, glX_XML, glX_proto_common, license
> +import glX_XML
> +import glX_proto_common
> +import gl_XML
Ordering glX_proto_common first
--Michael
> +import license
>  
>  
>  def log2(value):
> diff --git a/src/mapi/glapi/gen/gl_SPARC_asm.py 
> b/src/mapi/glapi/gen/gl_SPARC_asm.py
> index fa6217e..89bccef 100644
> --- a/src/mapi/glapi/gen/gl_SPARC_asm.py
> +++ b/src/mapi/glapi/gen/gl_SPARC_asm.py
> @@ -27,8 +27,9 @@
>  
>  import argparse
>  
> +import glX_XML
> +import gl_XML
>  import license
> -import gl_XML, glX_XML
>  
>  class PrintGenericStubs(gl_XML.gl_print_base):
>  def __init__(self):
> diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py
> index 2e7123e..8c622aa 100644
> --- a/src/mapi/glapi/gen/gl_XML.py
> +++ b/src/mapi/glapi/gen/gl_XML.py
> @@ -26,11 +26,13 @@
>  #Ian Romanick 
>  
>  from decimal import Decimal
> -import xml.etree.ElementTree as ET
> -import re, sys, string
>  import os.path
> -import typeexpr
> +import re
> +import string
> +import 

Re: [Mesa-dev] [PATCH 01/12] nvc0: allocate an area for compute user constbufs

2016-02-07 Thread Michael Schellenberger Costa
Hi,

Am 06/02/2016 um 23:38 schrieb Samuel Pitoiset:
> For compute shaders, we might need to upload uniforms.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 14 +++---
>  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 12 ++--
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c|  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c| 10 ++
>  4 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 2b12de4..84e4253 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -889,7 +889,7 @@ nvc0_screen_create(struct nouveau_device *dev)
>  */
> nouveau_heap_init(>text_heap, 0, (1 << 20) - 0x100);
>  
> -   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(>base), 1 << 12, 6 << 
> 16, NULL,
> +   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(>base), 1 << 12, 7 << 
> 16, NULL,
>  >uniform_bo);

There aren't any enums for those magic numbers here and below?

> if (ret)
>goto fail;
> @@ -901,8 +901,8 @@ nvc0_screen_create(struct nouveau_device *dev)
>/* auxiliary constants (6 user clip planes, base instance id) */
>BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>PUSH_DATA (push, 1024);
> -  PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
> -  PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
> +  PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
> +  PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
The pattern (N << 16) + (M << 10)) seems repetitive, would a helper make
sense here (Might help to avoid the magic numbers)?

Michael

>BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
>PUSH_DATA (push, (15 << 4) | 1);
>if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
> @@ -922,8 +922,8 @@ nvc0_screen_create(struct nouveau_device *dev)
> /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
> BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> PUSH_DATA (push, 256);
> -   PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
> -   PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
> +   PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
> +   PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
> BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
> PUSH_DATA (push, 0);
> PUSH_DATAf(push, 0.0f);
> @@ -931,8 +931,8 @@ nvc0_screen_create(struct nouveau_device *dev)
> PUSH_DATAf(push, 0.0f);
> PUSH_DATAf(push, 0.0f);
> BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
> -   PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
> -   PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
> +   PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
> +   PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
>  
> if (screen->base.drm->version >= 0x01000101) {
>ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, );
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index c17223a..2bb9b44 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -184,8 +184,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
>  ms = 1 << ms_mode;
>  BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>  PUSH_DATA (push, 1024);
> -PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 
> 10));
> -PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 
> 10));
> +PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 
> 10));
> +PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 
> 10));
>  BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
>  PUSH_DATA (push, 256 + 128);
>  for (i = 0; i < ms; i++) {
> @@ -318,8 +318,8 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, 
> unsigned s)
>  
> BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> PUSH_DATA (push, 1024);
> -   PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
> -   PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
> +   PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10));
> +   PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10));
> BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
> PUSH_DATA (push, 256);
> PUSH_DATAp(push, >clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
> @@ -479,8 +479,8 @@ nvc0_validate_buffers(struct nvc0_context *nvc0)
> for (s = 0; s < 5; s++) {
>BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>PUSH_DATA (push, 1024);
> -  

Re: [Mesa-dev] [PATCH 2/4] st/mesa: unify get_variant functions for TCS, TES, GS

2016-01-31 Thread Michael Schellenberger Costa
Hi

Am 30/01/2016 um 16:50 schrieb Marek Olšák:
> From: Marek Olšák 
> 
> ---
>  src/mesa/state_tracker/st_atom_shader.c |  18 +---
>  src/mesa/state_tracker/st_program.c | 164 
> +---
>  src/mesa/state_tracker/st_program.h |  17 +---
>  3 files changed, 28 insertions(+), 171 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_atom_shader.c 
> b/src/mesa/state_tracker/st_atom_shader.c
> index 2d8a3c3..23b7abf 100644
> --- a/src/mesa/state_tracker/st_atom_shader.c
> +++ b/src/mesa/state_tracker/st_atom_shader.c
> @@ -163,7 +163,6 @@ static void
>  update_gp( struct st_context *st )
>  {
> struct st_geometry_program *stgp;
> -   struct st_basic_variant_key key;
>  
> if (!st->ctx->GeometryProgram._Current) {
>cso_set_geometry_shader_handle(st->cso_context, NULL);
> @@ -173,10 +172,7 @@ update_gp( struct st_context *st )
> stgp = st_geometry_program(st->ctx->GeometryProgram._Current);
> assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV);
>  
> -   memset(, 0, sizeof(key));
> -   key.st = st->has_shareable_shaders ? NULL : st;
> -
> -   st->gp_variant = st_get_gp_variant(st, stgp, );
> +   st->gp_variant = st_get_basic_variant(st, >tgsi, >variants);
>  
> st_reference_geomprog(st, >gp, stgp);
>  
> @@ -199,7 +195,6 @@ static void
>  update_tcp( struct st_context *st )
>  {
> struct st_tessctrl_program *sttcp;
> -   struct st_basic_variant_key key;
>  
> if (!st->ctx->TessCtrlProgram._Current) {
>cso_set_tessctrl_shader_handle(st->cso_context, NULL);
> @@ -209,10 +204,7 @@ update_tcp( struct st_context *st )
> sttcp = st_tessctrl_program(st->ctx->TessCtrlProgram._Current);
> assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV);
>  
> -   memset(, 0, sizeof(key));
> -   key.st = st->has_shareable_shaders ? NULL : st;
> -
> -   st->tcp_variant = st_get_tcp_variant(st, sttcp, );
> +   st->tcp_variant = st_get_basic_variant(st, >tgsi, 
> >variants);
>  
> st_reference_tesscprog(st, >tcp, sttcp);
>  
> @@ -235,7 +227,6 @@ static void
>  update_tep( struct st_context *st )
>  {
> struct st_tesseval_program *sttep;
> -   struct st_basic_variant_key key;
>  
> if (!st->ctx->TessEvalProgram._Current) {
>cso_set_tesseval_shader_handle(st->cso_context, NULL);
> @@ -245,10 +236,7 @@ update_tep( struct st_context *st )
> sttep = st_tesseval_program(st->ctx->TessEvalProgram._Current);
> assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV);
>  
> -   memset(, 0, sizeof(key));
> -   key.st = st->has_shareable_shaders ? NULL : st;
> -
> -   st->tep_variant = st_get_tep_variant(st, sttep, );
> +   st->tep_variant = st_get_basic_variant(st, >tgsi, 
> >variants);
>  
> st_reference_tesseprog(st, >tep, sttep);
>  
> diff --git a/src/mesa/state_tracker/st_program.c 
> b/src/mesa/state_tracker/st_program.c
> index 133869b..2dfb41e 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -1266,49 +1266,41 @@ st_translate_geometry_program(struct st_context *st,
>  }
>  
>  
> -static struct st_basic_variant *
> -st_create_gp_variant(struct st_context *st,
> - struct st_geometry_program *stgp,
> - const struct st_basic_variant_key *key)
> -{
> -   struct pipe_context *pipe = st->pipe;
> -   struct st_basic_variant *gpv;
> -
> -   gpv = CALLOC_STRUCT(st_basic_variant);
> -   if (!gpv)
> -  return NULL;
> -
> -   /* fill in new variant */
> -   gpv->driver_shader = pipe->create_gs_state(pipe, >tgsi);
> -   gpv->key = *key;
> -   return gpv;
> -}
> -
> -
>  /**
>   * Get/create geometry program variant.
>   */

You might want to update that comment.
Michael

>  struct st_basic_variant *
> -st_get_gp_variant(struct st_context *st,
> -  struct st_geometry_program *stgp,
> -  const struct st_basic_variant_key *key)
> +st_get_basic_variant(struct st_context *st,
> + struct pipe_shader_state *tgsi,
> + struct st_basic_variant **variants)
>  {
> +   struct pipe_context *pipe = st->pipe;
> struct st_basic_variant *gpv;
> +   struct st_basic_variant_key key;
> +
> +   memset(, 0, sizeof(key));
> +   key.st = st->has_shareable_shaders ? NULL : st;
>  
> /* Search for existing variant */
> -   for (gpv = stgp->variants; gpv; gpv = gpv->next) {
> -  if (memcmp(>key, key, sizeof(*key)) == 0) {
> +   for (gpv = *variants; gpv; gpv = gpv->next) {
> +  if (memcmp(>key, , sizeof(key)) == 0) {
>   break;
>}
> }
>  
> if (!gpv) {
>/* create new */
> -  gpv = st_create_gp_variant(st, stgp, key);
> +  struct st_basic_variant *gpv;
> +
> +  gpv = CALLOC_STRUCT(st_basic_variant);
>if (gpv) {
> + /* fill in new variant */
> + gpv->driver_shader = pipe->create_gs_state(pipe, tgsi);
> + gpv->key = key;
> +
>   /* insert into list */
> -

Re: [Mesa-dev] [PATCH 2/2] i965: Implement nir_op_fquantize2f16

2016-01-13 Thread Michael Schellenberger Costa
Hi Jason

Am 13/01/2016 um 00:35 schrieb Jason Ekstrand:
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 13 +
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 ++
>  2 files changed, 23 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 6213378..ffb8059 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -943,6 +943,19 @@ fs_visitor::nir_emit_alu(const fs_builder , 
> nir_alu_instr *instr)
>inst->saturate = instr->dest.saturate;
>break;
>  
> +   case nir_op_fquantize2f16: {
> +  fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D);
> +
> +  /* The destination stride must be at least as big as the source 
> stride. */
> +  tmp.type = BRW_REGISTER_TYPE_W;
> +  tmp.stride = 2;
After a comment like 'at least at big' one would normaly expect some
check to ensure that. Maybe add a "So set it to 2" or whatever

-Michael
> +
> +  bld.emit(BRW_OPCODE_F32TO16, tmp, op[0]);
> +  inst = bld.emit(BRW_OPCODE_F16TO32, result, tmp);
> +  inst->saturate = instr->dest.saturate;
> +  break;
> +   }
> +
> case nir_op_fmin:
> case nir_op_imin:
> case nir_op_umin:
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 37f517d..77a2f8b 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -1177,6 +1177,16 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>inst->saturate = instr->dest.saturate;
>break;
>  
> +   case nir_op_fquantize2f16: {
> +  /* See also vec4_visitor::emit_pack_half_2x16() */
> +  src_reg tmp = src_reg(this, glsl_type::uvec4_type);
> +
> +  emit(F32TO16(dst_reg(tmp), op[0]));
> +  inst = emit(F16TO32(dst, tmp));
> +  inst->saturate = instr->dest.saturate;
> +  break;
> +   }
> +
> case nir_op_fmin:
> case nir_op_imin:
> case nir_op_umin:
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] st/glsl_to_tgsi: store if dst is double in array

2015-12-19 Thread Michael Schellenberger Costa


Am 19/12/2015 um 05:43 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> This is just a precursor patch to a fix for doubles with
> tessellation that I've written.
> 
> We need to descend into output arrays in that case and
> mark dst's as double.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 89ad6cd..4cad237 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -555,6 +555,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>  {
> glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
> int num_reladdr = 0, i, j;
> +   bool dst_is_double[2];
>  
> op = get_opcode(ir, op, dst, src0, src1);
>  
> @@ -658,7 +659,13 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>  * GLSL [0].z -> TGSI [1].xy
>  * GLSL [0].w -> TGSI [1].zw
>  */
> -   if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == 
> GLSL_TYPE_DOUBLE ||
> +   for (j = 0; j < 2; j++) {
> +  dst_is_double[j] = false;
Could you either use an initializer above or an else clause here?
-Michael
> +  if (inst->dst[j].type == GLSL_TYPE_DOUBLE)
> + dst_is_double[j] = true;
> +   }
> +
> +   if (dst_is_double[0] || dst_is_double[1] ||
> inst->src[0].type == GLSL_TYPE_DOUBLE) {
>glsl_to_tgsi_instruction *dinst = NULL;
>int initial_src_swz[4], initial_src_idx[4];
> @@ -699,7 +706,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>  
>   /* modify the destination if we are splitting */
>   for (j = 0; j < 2; j++) {
> -if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) {
> +if (dst_is_double[j]) {
> dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : 
> WRITEMASK_XY;
> dinst->dst[j].index = initial_dst_idx[j];
> if (i > 1)
> @@ -732,7 +739,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, 
> unsigned op,
>- F2D is a float src0, DLDEXP is integer src1 */
> if (op == TGSI_OPCODE_F2D ||
> op == TGSI_OPCODE_DLDEXP ||
> -   (op == TGSI_OPCODE_UCMP && dinst->dst[0].type == 
> GLSL_TYPE_DOUBLE)) {
> +   (op == TGSI_OPCODE_UCMP && dst_is_double[0])) {
>dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
> }
>  }
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/15] i965/fs: Stop relying on param_size in assign_constant_locations

2015-12-10 Thread Michael Schellenberger Costa
Hi,

Am 10.12.2015 um 05:23 schrieb Jason Ekstrand:
> Now that we have MOV_INDIRECT opcodes, we have all of the size information
> we need directly in the opcode.  With a little restructuring of the
> algorithm used in assign_constant_locations we don't need param_size
> anymore.  The big thing to watch out for now, however, is that you can have
> two ranges overlap where neither contains the other.  In order to deal with
> this, we make the first pass just flag what needs pulling and handle
> assigning pull constant locations until later.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 44 
> ++--
>  1 file changed, 17 insertions(+), 27 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 786c5fb..1add656 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1920,14 +1920,12 @@ fs_visitor::assign_constant_locations()
> if (dispatch_width != 8)
>return;
>  
> -   unsigned int num_pull_constants = 0;
> -
> -   pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
> -   memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms);
> -
> bool is_live[uniforms];
> memset(is_live, 0, sizeof(is_live));
>  
> +   bool needs_pull[uniforms];
> +   memset(needs_pull, 0, sizeof(is_live));
While it is valid, could you make this sizeof(needs_pull) to be safe?
Michael

> +
> /* First, we walk through the instructions and do two things:
>  *
>  *  1) Figure out which uniforms are live.
> @@ -1943,20 +1941,15 @@ fs_visitor::assign_constant_locations()
>   if (inst->src[i].file != UNIFORM)
>  continue;
>  
> - if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
> -int uniform = inst->src[0].nr;
> + int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
>  
> -/* If this array isn't already present in the pull constant 
> buffer,
> - * add it.
> - */
> -if (pull_constant_loc[uniform] == -1) {
> -   assert(param_size[uniform]);
> -   for (int j = 0; j < param_size[uniform]; j++)
> -  pull_constant_loc[uniform + j] = num_pull_constants++;
> + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
> +for (unsigned j = 0; j < inst->src[2].ud / 4; j++) {
> +   is_live[constant_nr + j] = true;
> +   needs_pull[constant_nr + j] = true;
>  }
>   } else {
>  /* Mark the the one accessed uniform as live */
> -int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
>  if (constant_nr >= 0 && constant_nr < (int) uniforms)
> is_live[constant_nr] = true;
>   }
> @@ -1973,26 +1966,23 @@ fs_visitor::assign_constant_locations()
>  */
> unsigned int max_push_components = 16 * 8;
> unsigned int num_push_constants = 0;
> +   unsigned int num_pull_constants = 0;
>  
> push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
> +   pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
>  
> for (unsigned int i = 0; i < uniforms; i++) {
> -  if (!is_live[i] || pull_constant_loc[i] != -1) {
> - /* This UNIFORM register is either dead, or has already been demoted
> -  * to a pull const.  Mark it as no longer living in the param[] 
> array.
> -  */
> - push_constant_loc[i] = -1;
> +  push_constant_loc[i] = -1;
> +  pull_constant_loc[i] = -1;
> +
> +  if (!is_live[i])
>   continue;
> -  }
>  
> -  if (num_push_constants < max_push_components) {
> - /* Retain as a push constant.  Record the location in the params[]
> -  * array.
> -  */
> +  if (!needs_pull[i] && num_push_constants < max_push_components) {
> + /* Retain as a push constant */
>   push_constant_loc[i] = num_push_constants++;
>} else {
> - /* Demote to a pull constant. */
> - push_constant_loc[i] = -1;
> + /* We have to pull it */
>   pull_constant_loc[i] = num_pull_constants++;
>}
> }
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/53] r600: move to using hw stages array for hw stage atoms

2015-11-30 Thread Michael Schellenberger Costa
Hi,

Am 30.11.2015 um 07:20 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> This moves to using an array of hw stages for the atoms.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c   |  8 +++-
>  src/gallium/drivers/r600/r600_hw_context.c   |  8 
>  src/gallium/drivers/r600/r600_pipe.h |  7 ++-
>  src/gallium/drivers/r600/r600_state.c|  8 +++-
>  src/gallium/drivers/r600/r600_state_common.c | 25 -
>  5 files changed, 24 insertions(+), 32 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 5333761..fd4c8b5 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -3496,7 +3496,7 @@ fallback:
>  void evergreen_init_state_functions(struct r600_context *rctx)
>  {
>   unsigned id = 1;
> -
> + unsigned i;
>   /* !!!
>*  To avoid GPU lockup registers must be emited in a specific order
>* (no kidding ...). The order below is important and have been
> @@ -3555,10 +3555,8 @@ void evergreen_init_state_functions(struct 
> r600_context *rctx)
>   r600_add_atom(rctx, >b.render_cond_atom, id++);
>   r600_add_atom(rctx, >b.streamout.begin_atom, id++);
>   r600_add_atom(rctx, >b.streamout.enable_atom, id++);
> - r600_init_atom(rctx, >vertex_shader.atom, id++, r600_emit_shader, 
> 23);
As this is effectively changed to
r600_init_atom(rctx, >vertex_shader.atom, id++, r600_emit_shader, 0);

Can you add something in the comment why this change is needed.

All the Best
Michael
> - r600_init_atom(rctx, >pixel_shader.atom, id++, r600_emit_shader, 
> 0);
> - r600_init_atom(rctx, >geometry_shader.atom, id++, 
> r600_emit_shader, 0);
> - r600_init_atom(rctx, >export_shader.atom, id++, r600_emit_shader, 
> 0);
> + for (i = 0; i < EG_NUM_HW_STAGES; i++)
> + r600_init_atom(rctx, >hw_shader_stages[i].atom, id++, 
> r600_emit_shader, 0);
>   r600_init_atom(rctx, >shader_stages.atom, id++, 
> evergreen_emit_shader_stages, 6);
>   r600_init_atom(rctx, >gs_rings.atom, id++, 
> evergreen_emit_gs_rings, 26);
>  
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
> b/src/gallium/drivers/r600/r600_hw_context.c
> index 6409f0b..13b6918 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -300,7 +300,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
>   r600_mark_atom_dirty(ctx, >db_misc_state.atom);
>   r600_mark_atom_dirty(ctx, >db_state.atom);
>   r600_mark_atom_dirty(ctx, >framebuffer.atom);
> - r600_mark_atom_dirty(ctx, >pixel_shader.atom);
> + r600_mark_atom_dirty(ctx, 
> >hw_shader_stages[R600_HW_STAGE_PS].atom);
>   r600_mark_atom_dirty(ctx, >poly_offset_state.atom);
>   r600_mark_atom_dirty(ctx, >vgt_state.atom);
>   r600_mark_atom_dirty(ctx, >sample_mask.atom);
> @@ -315,13 +315,13 @@ void r600_begin_new_cs(struct r600_context *ctx)
>   }
>   r600_mark_atom_dirty(ctx, >stencil_ref.atom);
>   r600_mark_atom_dirty(ctx, >vertex_fetch_shader.atom);
> - r600_mark_atom_dirty(ctx, >export_shader.atom);
> + r600_mark_atom_dirty(ctx, 
> >hw_shader_stages[R600_HW_STAGE_ES].atom);
>   r600_mark_atom_dirty(ctx, >shader_stages.atom);
>   if (ctx->gs_shader) {
> - r600_mark_atom_dirty(ctx, >geometry_shader.atom);
> + r600_mark_atom_dirty(ctx, 
> >hw_shader_stages[R600_HW_STAGE_GS].atom);
>   r600_mark_atom_dirty(ctx, >gs_rings.atom);
>   }
> - r600_mark_atom_dirty(ctx, >vertex_shader.atom);
> + r600_mark_atom_dirty(ctx, 
> >hw_shader_stages[R600_HW_STAGE_VS].atom);
>   r600_mark_atom_dirty(ctx, >b.streamout.enable_atom);
>   r600_mark_atom_dirty(ctx, >b.render_cond_atom);
>  
> diff --git a/src/gallium/drivers/r600/r600_pipe.h 
> b/src/gallium/drivers/r600/r600_pipe.h
> index 0e57efe..0ca4052 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -38,7 +38,7 @@
>  
>  #include "tgsi/tgsi_scan.h"
>  
> -#define R600_NUM_ATOMS 43
> +#define R600_NUM_ATOMS 45
>  
>  #define R600_MAX_VIEWPORTS 16
>  
> @@ -481,10 +481,7 @@ struct r600_context {
>   struct r600_viewport_state  viewport;
>   /* Shaders and shader resources. */
>   struct r600_cso_state   vertex_fetch_shader;
> - struct r600_shader_statevertex_shader;
> - struct r600_shader_statepixel_shader;
> - struct r600_shader_stategeometry_shader;
> - struct r600_shader_stateexport_shader;
> + struct r600_shader_statehw_shader_stages[EG_NUM_HW_STAGES];
>   struct r600_cs_shader_state cs_shader_state;
>   struct r600_shader_stages_state shader_stages;
>   struct r600_gs_rings_state  gs_rings;
> diff --git 

Re: [Mesa-dev] [PATCH 03/17] mesa: Move gl_frag_depth_layout from mtypes.h to shader_enums.h

2015-10-12 Thread Michael Schellenberger Costa
Hi,

as this conflicts directly with Robs patch
http://lists.freedesktop.org/archives/mesa-dev/2015-October/096670.html
you might want to update it

Michael

Am 09.10.2015 um 02:22 schrieb Jason Ekstrand:
> --- src/glsl/shader_enums.h | 17 + 
> src/mesa/main/mtypes.h  | 18 -- 2 files changed, 17
> insertions(+), 18 deletions(-)
> 
> diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h 
> index 2a5d2c5..1c2f5dc 100644 --- a/src/glsl/shader_enums.h +++
> b/src/glsl/shader_enums.h @@ -473,4 +473,21 @@ typedef enum
> 
> const char * gl_frag_result_name(gl_frag_result result);
> 
> +/** + * \brief Layout qualifiers for gl_FragDepth. + * + *
> Extension AMD_conservative_depth allows gl_FragDepth to be
> redeclared with + * a layout qualifier. + * + * \see enum
> ir_depth_layout + */ +enum gl_frag_depth_layout +{ +
> FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ +
> FRAG_DEPTH_LAYOUT_ANY, +   FRAG_DEPTH_LAYOUT_GREATER, +
> FRAG_DEPTH_LAYOUT_LESS, +   FRAG_DEPTH_LAYOUT_UNCHANGED +}; + 
> #endif /* SHADER_ENUMS_H */ diff --git a/src/mesa/main/mtypes.h
> b/src/mesa/main/mtypes.h index 288d757..6a2f323 100644 ---
> a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1866,24
> +1866,6 @@ typedef enum
> 
> 
> /** - * \brief Layout qualifiers for gl_FragDepth. - * - *
> Extension AMD_conservative_depth allows gl_FragDepth to be
> redeclared with - * a layout qualifier. - * - * \see enum
> ir_depth_layout - */ -enum gl_frag_depth_layout -{ -
> FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ -
> FRAG_DEPTH_LAYOUT_ANY, -   FRAG_DEPTH_LAYOUT_GREATER, -
> FRAG_DEPTH_LAYOUT_LESS, -   FRAG_DEPTH_LAYOUT_UNCHANGED -}; - - 
> -/** * Base class for any kind of program object */ struct
> gl_program
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Correctly handle GL_BGRA_EXT in ES3 format_and_type checks

2015-10-08 Thread Michael Schellenberger Costa
Hi,
Am 08.10.2015 um 00:58 schrieb Jason Ekstrand:
> The EXT_texture_format_BGRA extension (which mesa supports 
> unconditionally) adds a new format and internal format called 
> GL_BGRA_EXT. Previously, this was not really handled at all in 
> _mesa_ex3_error_check_format_and_type.  When the checks were 
> tightened in commit f15a7f3c, we accidentally tightened things too 
> far and GL_BGRA_EXT would always cause an error to be thrown.
> 
> There were two primary issues here.  First, is that 
> _mesa_es3_effective_internal_format_for_format_and_type didn't 
> handle the GL_BGRA_EXT format.  Second is that it blindly uses 
> _mesa_base_tex_format which returns GL_RGBA for GL_BGRA_EXT.  This 
> commit fixes both of these issues as well as adds explicit checks 
> that GL_BGRA_EXT is only ever used with GL_BGRA_EXT and 
> GL_UNSIGNED_BYTE.
> 
> Signed-off-by: Jason Ekstrand  Bugzilla: 
> https://bugs.freedesktop.org/show_bug.cgi?id=92265 Cc: Eduardo
> Lima Mitev  --- src/mesa/main/glformats.c | 21 
> +++-- 1 file changed, 19 insertions(+), 2 
> deletions(-)
> 
> diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
>  index 7dab33c..843ec02 100644 --- a/src/mesa/main/glformats.c +++ 
> b/src/mesa/main/glformats.c @@ -2678,6 +2678,7 @@ 
> _mesa_es3_effective_internal_format_for_format_and_type(GLenum 
> format, * internal formats, they do not correspond to GL
> constants, so the base * format is returned instead. */ +
> case GL_BGRA_EXT: case GL_LUMINANCE_ALPHA: case GL_LUMINANCE: case 
> GL_ALPHA: @@ -2797,8 +2798,19 @@ 
> _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
> if (effectiveInternalFormat == GL_NONE) return 
> GL_INVALID_OPERATION;
> 
> -  GLenum baseInternalFormat = - _mesa_base_tex_format(ctx,
> effectiveInternalFormat); +  GLenum baseInternalFormat; +
> if (internalFormat == GL_BGRA_EXT) { + /* Unfortunately,
> _mesa_base_tex_format returns a base format of + * GL_RGBA for
> GL_BGRA_EXT.  This makes perfect sense if you're + * asking the
> question, "what channels doe this format have?" + * However, if
> we're trying to determine if two internal formats + * match in the
> ES3 sense, we actually want GL_BGRA.
Shouldnt this be GL_BGRA_EXT at the end of the comment?

Michael
> +  */ + baseInternalFormat = GL_BGRA_EXT; +  } 
> else { + baseInternalFormat = + _mesa_base_tex_format(ctx,
> effectiveInternalFormat); +  }
> 
> if (internalFormat != baseInternalFormat) return 
> GL_INVALID_OPERATION; @@ -2807,6 +2819,11 @@ 
> _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
> }
> 
> switch (format) { +   case GL_BGRA_EXT: +  if (type != 
> GL_UNSIGNED_BYTE || internalFormat != GL_BGRA) + return 
> GL_INVALID_OPERATION; +  break; + case GL_RGBA: switch (type) {
> case GL_UNSIGNED_BYTE:
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/miptree: Rename intel_miptree_map::mt -> ::linear_mt (v2)

2015-09-26 Thread Michael Schellenberger Costa
Hi,
Am 25/09/2015 um 23:24 schrieb Chad Versace:
> Because that's what it is. It's an untiled, *linear* miptree.
> 
> v2: - Add space after /*. - Use one comment per function argument. 
> --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 28
> ++- 
> src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  2 +- 2 files
> changed, 16 insertions(+), 14 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index
> 1259664..067276c 100644 ---
> a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2140,16
> +2140,18 @@ intel_miptree_map_blit(struct brw_context *brw, struct
> intel_miptree_map *map, unsigned int level, unsigned int slice) { -
> map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, -
> 0, 0, -  map->w, map->h, 1, -
> 0, MIPTREE_LAYOUT_TILING_NONE); +   map->linear_mt =
> intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, +
> /* first_level */ 0, + /*
> last_level */ 0,
I am not sure about mesa, but the logical ordering is:
ARG, /* Comment */

Best wishes
Michael
> + map->w, map->h, 1, +
> /* samples */ 0, +
> MIPTREE_LAYOUT_TILING_NONE);
> 
> -   if (!map->mt) { +   if (!map->linear_mt) { fprintf(stderr,
> "Failed to allocate blit temporary\n"); goto fail; } -
> map->stride = map->mt->pitch; +   map->stride =
> map->linear_mt->pitch;
> 
> /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT
> implies no * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original
> values read in unless @@ -2160,7 +2162,7 @@
> intel_miptree_map_blit(struct brw_context *brw, if
> (!intel_miptree_blit(brw, mt, level, slice, map->x, map->y, false, 
> -  map->mt, 0, 0, +
> map->linear_mt, 0, 0, 0, 0, false, map->w, map->h, GL_COPY)) { 
> fprintf(stderr, "Failed to blit\n"); @@ -2168,7 +2170,7 @@
> intel_miptree_map_blit(struct brw_context *brw, } }
> 
> -   map->ptr = intel_miptree_map_raw(brw, map->mt); +   map->ptr =
> intel_miptree_map_raw(brw, map->linear_mt);
> 
> DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, 
> map->x, map->y, map->w, map->h, @@ -2178,7 +2180,7 @@
> intel_miptree_map_blit(struct brw_context *brw, return;
> 
> fail: -   intel_miptree_release(>mt); +
> intel_miptree_release(>linear_mt); map->ptr = NULL; 
> map->stride = 0; } @@ -2192,11 +2194,11 @@
> intel_miptree_unmap_blit(struct brw_context *brw, { struct
> gl_context *ctx = >ctx;
> 
> -   intel_miptree_unmap_raw(brw, map->mt); +
> intel_miptree_unmap_raw(brw, map->linear_mt);
> 
> if (map->mode & GL_MAP_WRITE_BIT) { bool ok =
> intel_miptree_blit(brw, -
> map->mt, 0, 0, +   map->linear_mt,
> 0, 0, 0, 0, false, mt, level, slice, map->x, map->y, false, @@
> -2204,7 +2206,7 @@ intel_miptree_unmap_blit(struct brw_context
> *brw, WARN_ONCE(!ok, "Failed to blit from linear temporary
> mapping"); }
> 
> -   intel_miptree_release(>mt); +
> intel_miptree_release(>linear_mt); }
> 
> /** @@ -2728,7 +2730,7 @@ intel_miptree_unmap(struct brw_context
> *brw, intel_miptree_unmap_etc(brw, mt, map, level, slice); } else
> if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) { 
> intel_miptree_unmap_depthstencil(brw, mt, map, level, slice); -   }
> else if (map->mt) { +   } else if (map->linear_mt) { 
> intel_miptree_unmap_blit(brw, mt, map, level, slice); #if
> defined(USE_SSE41) } else if (map->buffer && cpu_has_sse4_1) { diff
> --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index
> 830ff07..9f5397f 100644 ---
> a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -84,7 +84,7 @@
> struct intel_miptree_map { /** Possibly malloced temporary buffer
> for the mapping. */ void *buffer; /** Possible pointer to a
> temporary linear miptree for the mapping. */ -   struct
> intel_mipmap_tree *mt; +   struct intel_mipmap_tree *linear_mt; /**
> Pointer to the start of (map_x, map_y) returned by the mapping. */ 
> void *ptr; /** Stride of the mapping. */
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Fix output variable names

2015-09-11 Thread Michael Schellenberger Costa
Hi,

didnt Jason just send a similar patch to the list?

[Mesa-dev] [PATCH] nir/lower_outputs_to_temporaries: Reparent the output
name

Regards
Michael

Am 11/09/2015 um 09:24 schrieb Eduardo Lima Mitev:
> Commit 1dbe4af9c9e318525fc082b542b93fb7f1e5efba
> "nir: Add a pass to lower outputs to temporary variables" messed up output
> variable names. The issue can be reproduced by dumping the NIR shaders
> with INTEL_DEBUG="vs,fs".
> ---
>  src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c 
> b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> index b730cad..e9c4c0d 100644
> --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> +++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
> @@ -87,12 +87,13 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
> foreach_list_typed(nir_variable, var, node, _outputs) {
>nir_variable *output = ralloc(shader, nir_variable);
>memcpy(output, var, sizeof *output);
> +  output->name = ralloc_strdup(output, var->name);
>  
>/* The orignal is now the temporary */
>nir_variable *temp = var;
>  
>/* Give the output a new name with @out-temp appended */
> -  temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
> +  temp->name = ralloc_asprintf(output, "%s@out-temp", output->name);
>temp->data.mode = nir_var_global;
>temp->constant_initializer = NULL;
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/11] nir: Add a function for rewriting instruction destinations

2015-09-10 Thread Michael Schellenberger Costa
Hi,
Am 10/09/2015 um 02:50 schrieb Jason Ekstrand:
> ---
>  src/glsl/nir/nir.c | 24 
>  src/glsl/nir/nir.h |  2 ++
>  2 files changed, 26 insertions(+)
> 
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index e173b21..0090b08 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -1175,6 +1175,30 @@ nir_if_rewrite_condition(nir_if *if_stmt, nir_src 
> new_src)
>  }
>  
>  void
> +nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
> +{
> +   if (dest->is_ssa) {
> +  /* We can only overwrite an SSA destination if it has no uses. */
> +  assert(list_empty(>ssa.uses) && list_empty(>ssa.if_uses));
> +   } else {
> +  list_del(>reg.def_link);
> +  if (dest->reg.indirect)
> + src_remove_all_uses(dest->reg.indirect);
> +   }
> +
> +   /* We can't re-write with an SSA def */
> +   assert(!new_dest.is_ssa);
Wouldn't it make more sense to check that assert first, as the if clause
above may include some work

Michael
> +
> +   nir_dest_copy(dest, _dest, instr);
> +
> +   dest->reg.parent_instr = instr;
> +   list_addtail(>reg.def_link, _dest.reg.reg->defs);
> +
> +   if (dest->reg.indirect)
> +  src_add_all_uses(dest->reg.indirect, instr, NULL);
> +}
> +
> +void
>  nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
>   unsigned num_components, const char *name)
>  {
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 32c338c..a8e7363 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1725,6 +1725,8 @@ bool nir_srcs_equal(nir_src src1, nir_src src2);
>  void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
>  void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
>  void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
> +void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
> +nir_dest new_dest);
>  
>  void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
> unsigned num_components, const char *name);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] nir: Add new GS intrinsics that maintain a count of emitted vertices.

2015-09-04 Thread Michael Schellenberger Costa
Am 03.09.2015 um 10:48 schrieb Kenneth Graunke:
> This patch also introduces a lowering pass to convert the simple GS
> intrinsics to the new ones.  See the comments above that for the
> rationale behind the new intrinsics.
> 
> This should be useful for i965; it's a generic enough mechanism that I
> could see other drivers potentially using it as well, so I don't feel
> too bad about putting it in the generic code.
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/glsl/Makefile.sources  |   1 +
>  src/glsl/nir/nir.h |   2 +
>  src/glsl/nir/nir_intrinsics.h  |  21 
>  src/glsl/nir/nir_lower_gs_intrinsics.c | 214 
> +
>  4 files changed, 238 insertions(+)
>  create mode 100644 src/glsl/nir/nir_lower_gs_intrinsics.c
> 
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index c422303..ae2e3e1 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -36,6 +36,7 @@ NIR_FILES = \
>   nir/nir_lower_alu_to_scalar.c \
>   nir/nir_lower_atomics.c \
>   nir/nir_lower_global_vars_to_local.c \
> + nir/nir_lower_gs_intrinsics.c \
>   nir/nir_lower_load_const_to_scalar.c \
>   nir/nir_lower_locals_to_regs.c \
>   nir/nir_lower_idiv.c \
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 570a33c..13f123f 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1809,6 +1809,8 @@ void nir_lower_idiv(nir_shader *shader);
>  void nir_lower_atomics(nir_shader *shader);
>  void nir_lower_to_source_mods(nir_shader *shader);
>  
> +void nir_lower_gs_intrinsics(nir_shader *shader);
> +
>  void nir_normalize_cubemap_coords(nir_shader *shader);
>  
>  void nir_live_variables_impl(nir_function_impl *impl);
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index ed309b6..bcfe70d 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -79,9 +79,30 @@ BARRIER(memory_barrier)
>  /** A conditional discard, with a single boolean source. */
>  INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
>  
> +/**
> + * Basic Geometry Shader intrinsics.
> + *
> + * emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a 
> single
> + * index, which is the stream ID to write to.
> + *
> + * end_primitive implements GLSL's EndPrimitive() built-in.
> + */
>  INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
>  INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
>  
> +/**
> + * Geometry Shader intrinsics with a vertex count.
> + *
> + * Alternatively, drivers may implement these intrinsics, and use
> + * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
> + *
> + * These maintain a count of the number of vertices emitted, as an additional
> + * unsigned integer source.
> + */
> +INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
> +INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
> +INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
> +
>  /*
>   * Atomic counters
>   *
> diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c 
> b/src/glsl/nir/nir_lower_gs_intrinsics.c
> new file mode 100644
> index 000..b866d87
> --- /dev/null
> +++ b/src/glsl/nir/nir_lower_gs_intrinsics.c
> @@ -0,0 +1,214 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Kenneth Graunke 
> + */
In the light of https://patchwork.freedesktop.org/patch/57683/ this
might challenge Ians pyromancer skills

> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +
> +/**
> + * \file nir_lower_gs_intrinsics.c
> + *
> + * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an
> + * arbitrary number of vertices.  However, the shader must declare the 
> 

Re: [Mesa-dev] [PATCH 07/20] i965/fs: Import image memory offset calculation code.

2015-07-24 Thread Michael Schellenberger Costa
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Hi,
Am 21.07.2015 um 18:38 schrieb Francisco Jerez:
 Define a function to calculate the memory address of the image 
 location given by a vector of coordinates.  This is required in
 cases where we need to fall back to untyped surface access, which
 take a raw memory offset and know nothing about surface
 coordinates, type conversion or memory tiling and swizzling.  They
 are still useful because typed surface reads don't support any 64
 or 128-bit formats on IVB, and they don't support any 128-bit
 formats on HSW and BDW.
 
 The tiling algorithm is implemented based on a number of
 parameters which are passed in as uniforms and determine whether
 the surface layout is X-tiled, Y-tiled or untiled.  This allows
 binding surfaces of different tiling layouts to the pipeline
 without recompiling the program.
 
 v2: Drop VEC4 suport. v3: Rebase. --- 
 .../drivers/dri/i965/brw_fs_surface_builder.cpp| 108
 + 1 file changed, 108 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
 b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index
 5ee04de..0c879db 100644 ---
 a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++
 b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -215,4
 +215,112 @@ namespace { return BRW_PREDICATE_NORMAL; } } + +
 namespace image_coordinates { +  /** +   * Calculate the
 offset in memory of the texel given by \p coord. +   * +
 * This is meant to be used with untyped surface messages to access
 a +   * tiled surface, what involves taking into account the
 tiling and +   * swizzling modes of the surface manually so it
 will hopefully not +   * happen very often. +   */ +
 fs_reg +  emit_address_calculation(const fs_builder bld, const
 fs_reg image, +   const fs_reg coord,
 unsigned dims) +  { + const brw_device_info *devinfo =
 bld.shader-devinfo; + const fs_reg off = offset(image,
 bld, BRW_IMAGE_PARAM_OFFSET_OFFSET); + const fs_reg stride
 = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET); +
 const fs_reg tile = offset(image, bld,
 BRW_IMAGE_PARAM_TILING_OFFSET); + const fs_reg swz =
 offset(image, bld, BRW_IMAGE_PARAM_SWIZZLING_OFFSET); +
 const fs_reg addr = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); +
 const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); +
 const fs_reg minor = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); +
 const fs_reg major = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); +
 const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); + + /*
 Shift the coordinates by the fixed surface offset. */ + for
 (unsigned c = 0; c  2; ++c) +bld.ADD(offset(addr, bld,
 c), offset(off, bld, c), +(c  dims ? +
 offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) : +
 fs_reg(0)));

^That loop should have curly braces for readability
.
Regards
Michael
 + + if (dims  2) { +/* Decompose z into a
 major (tmp.y) and a minor (tmp.x) + * index. +
 */ +bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2),
 fs_reg(0), +offset(retype(coord,
 BRW_REGISTER_TYPE_UD), bld, 2)); +bld.SHR(offset(tmp,
 bld, 1), +offset(retype(coord,
 BRW_REGISTER_TYPE_UD), bld, 2), +offset(tile,
 bld, 2)); + +/* Take into account the horizontal
 (tmp.x) and vertical (tmp.y) + * slice offset. +
 */ +for (unsigned c = 0; c  2; ++c) { +
 bld.MUL(offset(tmp, bld, c), +   offset(stride,
 bld, 2 + c), offset(tmp, bld, c)); +
 bld.ADD(offset(addr, bld, c), +   offset(addr,
 bld, c), offset(tmp, bld, c)); +} + } + +
 if (dims  1) { +for (unsigned c = 0; c  2; ++c) { +
 /* Calculate the minor x and y indices. */ +
 bld.BFE(offset(minor, bld, c), offset(tile, bld, c), +
 fs_reg(0), offset(addr, bld, c)); + +   /* Calculate
 the major x and y indices. */ +   bld.SHR(offset(major,
 bld, c), +   offset(addr, bld, c), offset(tile,
 bld, c)); +} + +/* Calculate the texel
 index from the start of the tile row and + * the
 vertical coordinate of the row. + * Equivalent to: +
 *   tmp.x = (major.x  tile.y  tile.x) + + *
 (minor.y  tile.x) + minor.x + *   tmp.y = major.y 
 tile.y + */ +bld.SHL(tmp, major,
 offset(tile, bld, 1)); +bld.ADD(tmp, tmp, offset(minor,
 bld, 1)); +bld.SHL(tmp, tmp, offset(tile, bld, 0)); +
 bld.ADD(tmp, tmp, minor); +bld.SHL(offset(tmp, bld,
 1), +offset(major, bld, 1), offset(tile, bld,
 1)); + +/* Add it to the start of the tile row. */ +
 bld.MUL(offset(tmp, bld, 1), +offset(tmp, bld,
 1), offset(stride, bld, 1)); +bld.ADD(tmp, 

Re: [Mesa-dev] [PATCH 3/7] tgsi: add support for geometry shader streams.

2015-05-27 Thread Michael Schellenberger Costa
Hi,

Am 27/05/2015 um 09:45 schrieb Dave Airlie:
 This adds support to retrieve the primitive counts
 for each stream, along with the offset for each
 primitive into the output array.
 
 It also adds support for parsing the stream argument
 to the emit and end instructions.
 
 Signed-off-by: Dave Airlie airl...@redhat.com
 ---
  src/gallium/auxiliary/tgsi/tgsi_exec.c | 59 
 ++
  src/gallium/auxiliary/tgsi/tgsi_exec.h | 14 ++--
  2 files changed, 58 insertions(+), 15 deletions(-)
 
 diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
 b/src/gallium/auxiliary/tgsi/tgsi_exec.c
 index a098a82..f080385 100644
 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
 +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
 @@ -706,7 +706,22 @@ enum tgsi_exec_datatype {
  #define TEMP_OUTPUT_C  TGSI_EXEC_TEMP_OUTPUT_C
  #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
  #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
 -
 +#define TEMP_PRIMITIVE_S1_I   TGSI_EXEC_TEMP_PRIMITIVE_S1_I
 +#define TEMP_PRIMITIVE_S1_C   TGSI_EXEC_TEMP_PRIMITIVE_S1_C
 +#define TEMP_PRIMITIVE_S2_I   TGSI_EXEC_TEMP_PRIMITIVE_S2_I
 +#define TEMP_PRIMITIVE_S2_C   TGSI_EXEC_TEMP_PRIMITIVE_S2_C
 +#define TEMP_PRIMITIVE_S3_I   TGSI_EXEC_TEMP_PRIMITIVE_S3_I
 +#define TEMP_PRIMITIVE_S3_C   TGSI_EXEC_TEMP_PRIMITIVE_S3_C
 +
 +static const struct {
 +   int idx;
 +   int chan;
 +} temp_prim_idxs[] = {
 +   { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C },
 +   { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C },
 +   { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C },
 +   { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C },
 +};

Is that last comma intentional?

Regards
Michael

  
  /** The execution mask depends on the conditional mask and the loop mask */
  #define UPDATE_EXEC_MASK(MACH) \
 @@ -1848,35 +1863,51 @@ exec_kill(struct tgsi_exec_machine *mach,
  }
  
  static void
 -emit_vertex(struct tgsi_exec_machine *mach)
 +emit_vertex(struct tgsi_exec_machine *mach,
 +const struct tgsi_full_instruction *inst)
  {
 +   union tgsi_exec_channel r[1];
 +   unsigned stream_id;
 +   unsigned *prim_count;
 /* FIXME: check for exec mask correctly
 unsigned i;
 for (i = 0; i  TGSI_QUAD_SIZE; ++i) {
   if ((mach-ExecMask  (1  i)))
 */
 +   IFETCH(r[0], 0, TGSI_CHAN_X);
 +   stream_id = r[0].u[0];
 +   prim_count = 
 mach-Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
 if (mach-ExecMask) {
 -  if 
 (mach-Primitives[mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] 
 = mach-MaxOutputVertices)
 +  if (mach-Primitives[stream_id][*prim_count] = 
 mach-MaxOutputVertices)
   return;
  
 +  mach-PrimitiveOffsets[stream_id][*prim_count] = 
 mach-Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0];
mach-Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 
 mach-NumOutputs;
 -  
 mach-Primitives[mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
 +  mach-Primitives[stream_id][*prim_count]++;
 }
  }
  
  static void
 -emit_primitive(struct tgsi_exec_machine *mach)
 +emit_primitive(struct tgsi_exec_machine *mach,
 +   const struct tgsi_full_instruction *inst)
  {
 -   unsigned *prim_count = 
 mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
 +   unsigned *prim_count;
 +   union tgsi_exec_channel r[1];
 +   unsigned stream_id = 0;
 /* FIXME: check for exec mask correctly
 unsigned i;
 for (i = 0; i  TGSI_QUAD_SIZE; ++i) {
   if ((mach-ExecMask  (1  i)))
 */
 +   if (inst) {
 +  IFETCH(r[0], 0, TGSI_CHAN_X);
 +  stream_id = r[0].u[0];
 +   }
 +   prim_count = 
 mach-Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
 if (mach-ExecMask) {
++(*prim_count);
debug_assert((*prim_count * mach-NumOutputs)  
 mach-MaxGeometryShaderOutputs);
 -  mach-Primitives[*prim_count] = 0;
 +  mach-Primitives[stream_id][*prim_count] = 0;
 }
  }
  
 @@ -1885,9 +1916,9 @@ conditional_emit_primitive(struct tgsi_exec_machine 
 *mach)
  {
 if (TGSI_PROCESSOR_GEOMETRY == mach-Processor) {
int emitted_verts =
 - 
 mach-Primitives[mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
 + 
 mach-Primitives[0][mach-Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]];
if (emitted_verts) {
 - emit_primitive(mach);
 + emit_primitive(mach, NULL);
}
 }
  }
 @@ -4596,11 +4627,11 @@ exec_instruction(
break;
  
 case TGSI_OPCODE_EMIT:
 -  emit_vertex(mach);
 +  emit_vertex(mach, inst);
break;
  
 case TGSI_OPCODE_ENDPRIM:
 -  emit_primitive(mach);
 +  emit_primitive(mach, inst);
break;
  
 case TGSI_OPCODE_BGNLOOP:
 @@ -5061,8 +5092,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
 mach-Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
  
 if( mach-Processor == TGSI_PROCESSOR_GEOMETRY ) {
 -  

Re: [Mesa-dev] [PATCH 3/7] tgsi: add support for geometry shader streams.

2015-05-27 Thread Michael Schellenberger Costa
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Hi,

Am 27.05.2015 um 09:45 schrieb Dave Airlie:
 This adds support to retrieve the primitive counts for each stream,
 along with the offset for each primitive into the output array.
 
 It also adds support for parsing the stream argument to the emit
 and end instructions.
 
 Signed-off-by: Dave Airlie airl...@redhat.com --- 
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 59
 ++ 
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 14 ++-- 2 files
 changed, 58 insertions(+), 15 deletions(-)
 
 diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
 b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a098a82..f080385
 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++
 b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -706,7 +706,22 @@ enum
 tgsi_exec_datatype { #define TEMP_OUTPUT_C
 TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I
 TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C
 TGSI_EXEC_TEMP_PRIMITIVE_C - +#define TEMP_PRIMITIVE_S1_I
 TGSI_EXEC_TEMP_PRIMITIVE_S1_I +#define TEMP_PRIMITIVE_S1_C
 TGSI_EXEC_TEMP_PRIMITIVE_S1_C +#define TEMP_PRIMITIVE_S2_I
 TGSI_EXEC_TEMP_PRIMITIVE_S2_I +#define TEMP_PRIMITIVE_S2_C
 TGSI_EXEC_TEMP_PRIMITIVE_S2_C +#define TEMP_PRIMITIVE_S3_I
 TGSI_EXEC_TEMP_PRIMITIVE_S3_I +#define TEMP_PRIMITIVE_S3_C
 TGSI_EXEC_TEMP_PRIMITIVE_S3_C + +static const struct { +   int
 idx; +   int chan; +} temp_prim_idxs[] = { +   { TEMP_PRIMITIVE_I,
 TEMP_PRIMITIVE_C }, +   { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C
 }, +   { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C }, +   {
 TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C }, +};

Is the last comma intentional?

Regards
Michael

 
 /** The execution mask depends on the conditional mask and the loop
 mask */ #define UPDATE_EXEC_MASK(MACH) \ @@ -1848,35 +1863,51 @@
 exec_kill(struct tgsi_exec_machine *mach, }
 
 static void -emit_vertex(struct tgsi_exec_machine *mach) 
 +emit_vertex(struct tgsi_exec_machine *mach, +const
 struct tgsi_full_instruction *inst) { +   union tgsi_exec_channel
 r[1]; +   unsigned stream_id; +   unsigned *prim_count; /* FIXME:
 check for exec mask correctly unsigned i; for (i = 0; i 
 TGSI_QUAD_SIZE; ++i) { if ((mach-ExecMask  (1  i))) */ +
 IFETCH(r[0], 0, TGSI_CHAN_X); +   stream_id = r[0].u[0]; +
 prim_count =
 mach-Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream
_id].chan].u[0];

 
if (mach-ExecMask) {
 -  if
 (mach-Primitives[mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C]
.u[0]]
 = mach-MaxOutputVertices) +  if
 (mach-Primitives[stream_id][*prim_count] =
 mach-MaxOutputVertices) return;
 
 +  mach-PrimitiveOffsets[stream_id][*prim_count] =
 mach-Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]; 
 mach-Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] +=
 mach-NumOutputs; -
 mach-Primitives[mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].
u[0]]++;

 
+  mach-Primitives[stream_id][*prim_count]++;
 } }
 
 static void -emit_primitive(struct tgsi_exec_machine *mach) 
 +emit_primitive(struct tgsi_exec_machine *mach, +
 const struct tgsi_full_instruction *inst) { -   unsigned
 *prim_count =
 mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; +
 unsigned *prim_count; +   union tgsi_exec_channel r[1]; +
 unsigned stream_id = 0; /* FIXME: check for exec mask correctly 
 unsigned i; for (i = 0; i  TGSI_QUAD_SIZE; ++i) { if
 ((mach-ExecMask  (1  i))) */ +   if (inst) { +
 IFETCH(r[0], 0, TGSI_CHAN_X); +  stream_id = r[0].u[0]; +   } 
 +   prim_count =
 mach-Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream
_id].chan].u[0];

 
if (mach-ExecMask) {
 ++(*prim_count); debug_assert((*prim_count * mach-NumOutputs) 
 mach-MaxGeometryShaderOutputs); -
 mach-Primitives[*prim_count] = 0; +
 mach-Primitives[stream_id][*prim_count] = 0; } }
 
 @@ -1885,9 +1916,9 @@ conditional_emit_primitive(struct
 tgsi_exec_machine *mach) { if (TGSI_PROCESSOR_GEOMETRY ==
 mach-Processor) { int emitted_verts = -
 mach-Primitives[mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].
u[0]];

 
+
mach-Primitives[0][mach-Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_id
xs[0].chan].u[0]];
 if (emitted_verts) { - emit_primitive(mach); +
 emit_primitive(mach, NULL); } } } @@ -4596,11 +4627,11 @@
 exec_instruction( break;
 
 case TGSI_OPCODE_EMIT: -  emit_vertex(mach); +
 emit_vertex(mach, inst); break;
 
 case TGSI_OPCODE_ENDPRIM: -  emit_primitive(mach); +
 emit_primitive(mach, inst); break;
 
 case TGSI_OPCODE_BGNLOOP: @@ -5061,8 +5092,10 @@
 tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 
 mach-Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
 
 if( mach-Processor == TGSI_PROCESSOR_GEOMETRY ) { -
 mach-Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; -
 mach-Primitives[0] = 0; +  for (i = 0; i 
 TGSI_MAX_VERTEX_STREAMS; i++) { +
 mach-Temps[temp_prim_idxs[i].idx].xyzw[temp_prim_idxs[i].chan].u[0]
 = 0; + mach-Primitives[i][0] = 0; +  } /* GS runs on a
 single primitive for now */ 

Re: [Mesa-dev] [PATCH 2/4] radeon: Make use of _mesa_get_viewport_xform.

2015-03-31 Thread Michael Schellenberger Costa
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Hi,

could you use the same ordering as in the 1965 patch?

scale[0]
scale[1]
scale[2]
translate[0]
translate[1]
translate[2]

This simplifies reading and comparing a lot.

Best wishes
Michael
-BEGIN PGP SIGNATURE-
Version: GnuPG v2.0.22 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJVGlNvAAoJECEwb8zWbvHC72IQAJByxpnroczS4xjo/GUqfxTp
w4IzlpX7LTiptoViM12bv9aMSPhmQM2NOVjUVcC6I5arrJRdAjVYjOOfTYLE2khI
hhAPqwfa+mZ86GbGxb1lxEeC0AGUudmgGinYSkzUohYPTIzJGt8zhSRcvXM7flEj
UskUPZmn66V6vlF7hVOzQLKDw/9jNrk+9xfw83ZewpS3vbjzbXxKrfYFf1LkPKK2
g2Aq2y55+ndX+FVMuUWqV+OOJL/rsrlxXX1TwAW01jJGoha9U4gsdc/4RJqDITSu
QY5n5WJi+8p8eX/GQVKX54v0lsGnRU33YZU3sx99dxNNI2Ej8legIooDno6Y34lj
dosXKOycvzep1/qMqpZThSVhhQ/Q/owpfYvkQhk4KTZmR+Ck8sTmD4Lk3Id7XtSV
wnWa/BvNauPAlUe2aMs65lsPCS1v8mtatM95jkA6hytaFtMNB2DmozNZqRyxOWl/
QgCCikdVj8ZLKEDpP7cODZMlp9Z+GuX+lAb9YIq6bEbIbiYtQk/2YOZgaB79COrO
L7EeHb8ngSszwfSy2QvYRB359maEM2Jnsfv46W1XLnNf9hmZDGU+9nrnmgL4nyUj
VJlsRI68TqyzGwdTmvmzhxoBo8T887FZYmqcOn6KKvS1ixvM9PbShpvY8zp7mbgr
udVZiZlaIuiSDFtYQ2tu
=XJ6i
-END PGP SIGNATURE-


0xD66EF1C2.asc
Description: application/pgp-keys


0xD66EF1C2.asc.sig
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev