Re: [Mesa-dev] [PATCH v2 1/2] nir: Add nir_lower_viewport_transform

2019-04-08 Thread Thomas Helland
c->impl) {
> + nir_foreach_instr_safe(instr, block) {
> +if (instr->type != nir_instr_type_intrinsic) continue;
> +
> +nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
> +if (intr->intrinsic != nir_intrinsic_store_deref) continue;
> +
> +nir_variable *var = nir_intrinsic_get_var(intr, 0);
> +if (var->data.location != VARYING_SLOT_POS) continue;
> +

I believe it's agreement in mesa that the if ( ... ) should be on one line,
and the continue, return, etc should be on a new one.

Regards,
Thomas Helland

> +nir_builder b;
> +nir_builder_init(, func->impl);
> +b.cursor = nir_before_instr(instr);
> +
> +/* Grab the source and viewport */
> +nir_ssa_def *input_point = nir_ssa_for_src(, intr->src[1], 4);
> +nir_ssa_def *scale = nir_load_viewport_scale();
> +nir_ssa_def *offset = nir_load_viewport_offset();
> +
> +/* World space to normalised device coordinates to screen space 
> */
> +
> +nir_ssa_def *w_recip = nir_frcp(, nir_channel(, input_point, 
> 3));
> +
> +nir_ssa_def *ndc_point = nir_fmul(,
> +  nir_channels(, input_point, 0x7), w_recip);
> +
> +nir_ssa_def *screen = nir_fadd(,
> +  nir_fmul(, ndc_point, scale), offset);
> +
> +/* gl_Position will be written out in screenspace xyz, with w 
> set to
> + * the reciprocal we computed earlier. The transformed w 
> component is
> + * then used for perspective-correct varying interpolation. The
> + * transformed w component must preserve its original sign; this 
> is
> + * used in depth clipping computations */
> +
> +nir_ssa_def *screen_space = nir_vec4(,
> + nir_channel(, screen, 0),
> + nir_channel(, screen, 1),
> + nir_channel(, screen, 2),
> + w_recip);
> +
> +nir_instr_rewrite_src(instr, >src[1],
> +  nir_src_for_ssa(screen_space));
> + }
> +  }
> +
> +  nir_metadata_preserve(func->impl, nir_metadata_block_index |
> +nir_metadata_dominance);
> +   }
> +}
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: clone instruction set rather than removing individual entries

2019-02-21 Thread Thomas Helland
This patch is:

Reviewed-by: Thomas Helland

Den ons. 20. feb. 2019 kl. 04:04 skrev Timothy Arceri :
>
> This reduces the time spent in nir_opt_cse() by almost a half.
> ---
>  src/compiler/nir/nir_opt_cse.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_cse.c b/src/compiler/nir/nir_opt_cse.c
> index bf42a6a33dc..3c3617d852a 100644
> --- a/src/compiler/nir/nir_opt_cse.c
> +++ b/src/compiler/nir/nir_opt_cse.c
> @@ -39,9 +39,10 @@
>   */
>
>  static bool
> -cse_block(nir_block *block, struct set *instr_set)
> +cse_block(nir_block *block, struct set *dominance_set)
>  {
> bool progress = false;
> +   struct set *instr_set = _mesa_set_clone(dominance_set, NULL);
>
> nir_foreach_instr_safe(instr, block) {
>if (nir_instr_set_add_or_rewrite(instr_set, instr)) {
> @@ -55,8 +56,7 @@ cse_block(nir_block *block, struct set *instr_set)
>progress |= cse_block(child, instr_set);
> }
>
> -   nir_foreach_instr(instr, block)
> - nir_instr_set_remove(instr_set, instr);
> +   _mesa_set_destroy(instr_set, NULL);
>
> return progress;
>  }
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 06/11] nir: rework force_unroll_array_access()

2018-12-04 Thread Thomas Helland
Den tir. 20. nov. 2018 kl. 09:15 skrev Timothy Arceri :
>
> Here we rework force_unroll_array_access() so that we can reused
> the induction variable detection in a following patch.
> ---
>  src/compiler/nir/nir_loop_analyze.c | 49 -
>  1 file changed, 35 insertions(+), 14 deletions(-)
>
> diff --git a/src/compiler/nir/nir_loop_analyze.c 
> b/src/compiler/nir/nir_loop_analyze.c
> index 700d1fe552..a103a22afd 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -350,6 +350,38 @@ find_loop_terminators(loop_info_state *state)
> return success;
>  }
>
> +/* This function looks for an array access within a loop that use an 
> induction
> + * variable for the array index. If found it returns the size of the array,
> + * otherwise 0 is returned.
> + */
> +static unsigned
> +find_array_access_via_induction(loop_info_state *state,
> +nir_deref_instr *deref,
> +nir_loop_variable **array_index_out)

Maybe make a small comment about the last parameter?
I guess it's quite obvious, but I'd rather have too much
handholding than too little =)

> +{
> +   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
> +  if (d->deref_type != nir_deref_type_array)
> + continue;
> +
> +  assert(d->arr.index.is_ssa);
> +  nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
> +
> +  if (array_index->type != basic_induction)
> + continue;
> +
> +  if (array_index_out)
> + *array_index_out = array_index;
> +
> +  nir_deref_instr *parent = nir_deref_instr_parent(d);
> +  assert(glsl_type_is_array(parent->type) ||
> + glsl_type_is_matrix(parent->type));

Maybe use glsl_type_is_array_or_matrix() ?
With (or without) that this patchs looks good and is:

Reviewed-by: Thomas Helland 

> +
> +  return glsl_get_length(parent->type);
> +   }
> +
> +   return 0;
> +}
> +
>  static int32_t
>  get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value 
> *step,
>nir_const_value *limit)
> @@ -626,20 +658,9 @@ find_trip_count(loop_info_state *state)
>  static bool
>  force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
>  {
> -   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
> -  if (d->deref_type != nir_deref_type_array)
> - continue;
> -
> -  assert(d->arr.index.is_ssa);
> -  nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
> -
> -  if (array_index->type != basic_induction)
> - continue;
> -
> -  nir_deref_instr *parent = nir_deref_instr_parent(d);
> -  assert(glsl_type_is_array(parent->type) ||
> - glsl_type_is_matrix(parent->type));
> -  if (glsl_get_length(parent->type) == state->loop->info->max_trip_count)
> +   unsigned array_size = find_array_access_via_induction(state, deref, NULL);
> +   if (array_size) {
> +  if (array_size == state->loop->info->max_trip_count)
>   return true;
>
>if (deref->mode & state->indirect_mask)
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/11] nir: factor out some of the complex loop unroll code to a helper

2018-12-04 Thread Thomas Helland
Den tir. 20. nov. 2018 kl. 09:15 skrev Timothy Arceri :
>
> ---
>  src/compiler/nir/nir_opt_loop_unroll.c | 116 ++---
>  1 file changed, 64 insertions(+), 52 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
> b/src/compiler/nir/nir_opt_loop_unroll.c
> index 935429add4..dc440e88b1 100644
> --- a/src/compiler/nir/nir_opt_loop_unroll.c
> +++ b/src/compiler/nir/nir_opt_loop_unroll.c
> @@ -237,6 +237,65 @@ get_complex_unroll_insert_location(nir_cf_node *node, 
> bool continue_from_then)
> }
>  }
>
> +static nir_cf_node *
> +complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
> + nir_cf_list *lp_header, nir_cf_list *lp_body,
> + struct hash_table *remap_table,
> + unsigned num_times_to_clone)
> +{
> +   /* In the terminator that we have no trip count for move everything after
> +* the terminator into the continue from branch.
> +*/
> +   nir_cf_list loop_end;
> +   nir_cf_extract(_end, nir_after_cf_node(_term->nif->cf_node),
> +  nir_after_block(nir_loop_last_block(loop)));
> +   move_cf_list_into_loop_term(_end, unlimit_term);
> +
> +   /* Pluck out the loop body. */
> +   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
> +  nir_after_block(nir_loop_last_block(loop)));
> +
> +   /* Set unroll_loc to the loop as we will insert the unrolled loop before 
> it
> +*/
> +   nir_cf_node *unroll_loc = >cf_node;
> +
> +   /* Temp list to store the cloned loop as we unroll */
> +   nir_cf_list unrolled_lp_body;
> +
> +   for (unsigned i = 0; i < num_times_to_clone; i++) {
> +
> +  nir_cursor cursor =
> + get_complex_unroll_insert_location(unroll_loc,
> +
> unlimit_term->continue_from_then);
> +
> +  /* Clone loop header and insert in if branch */
> +  nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
> + cursor, remap_table);
> +
> +  cursor =
> + get_complex_unroll_insert_location(unroll_loc,
> +
> unlimit_term->continue_from_then);
> +
> +  /* Clone loop body */
> +  nir_cf_list_clone(_lp_body, lp_body, loop->cf_node.parent,
> +remap_table);
> +
> +  unroll_loc = exec_node_data(nir_cf_node,
> +  exec_list_get_tail(_lp_body.list),
> +  node);
> +  assert(unroll_loc->type == nir_cf_node_block &&
> + 
> exec_list_is_empty(_cf_node_as_block(unroll_loc)->instr_list));
> +
> +  /* Get the unrolled if node */
> +  unroll_loc = nir_cf_node_prev(unroll_loc);
> +
> +  /* Insert unrolled loop body */
> +  nir_cf_reinsert(_lp_body, cursor);
> +   }
> +
> +   return unroll_loc;
> +}
> +
>  /**
>   * Unroll a loop with two exists when the trip count of one of the exits is
>   * unknown.  If continue_from_then is true, the loop is repeated only when 
> the
> @@ -359,61 +418,14 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
> *unlimit_term,
>num_times_to_clone = loop->info->max_trip_count;
> }
>
> -   /* In the terminator that we have no trip count for move everything after
> -* the terminator into the continue from branch.
> -*/
> -   nir_cf_list loop_end;
> -   nir_cf_extract(_end, nir_after_cf_node(_term->nif->cf_node),
> -  nir_after_block(nir_loop_last_block(loop)));
> -   move_cf_list_into_loop_term(_end, unlimit_term);
> -
> -   /* Pluck out the loop body. */
> -   nir_cf_list loop_body;
> -   nir_cf_extract(_body, nir_before_block(nir_loop_first_block(loop)),
> -  nir_after_block(nir_loop_last_block(loop)));
> -
> struct hash_table *remap_table =
>_mesa_hash_table_create(NULL, _mesa_hash_pointer,
>_mesa_key_pointer_equal);
>
> -   /* Set unroll_loc to the loop as we will insert the unrolled loop before 
> it
> -*/
> -   nir_cf_node *unroll_loc = >cf_node;
> -
> -   /* Temp lists to store the cloned loop as we unroll */
> -   nir_cf_list unrolled_lp_body;
> -   nir_cf_list cloned_header;

^ This was actually unused as of the previous patch?
Might want to remove it there instead, but that's just
nitpicking. Either way this patch is:

Reviewed-by: Thomas Helland 

> -
> -   for (unsigned i = 0; i < num_times_to_clone; i++) {
> -
> -  nir_cursor cursor =
> -   

Re: [Mesa-dev] [PATCH 04/11] nir: make use of new nir_cf_list_clone_and_reinsert() helper

2018-12-03 Thread Thomas Helland
Den tir. 20. nov. 2018 kl. 09:15 skrev Timothy Arceri :
>
> ---
>  src/compiler/nir/nir_opt_loop_unroll.c | 74 ++
>  1 file changed, 28 insertions(+), 46 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
> b/src/compiler/nir/nir_opt_loop_unroll.c
> index 0e9966320b..935429add4 100644
> --- a/src/compiler/nir/nir_opt_loop_unroll.c
> +++ b/src/compiler/nir/nir_opt_loop_unroll.c
> @@ -169,32 +169,22 @@ simple_unroll(nir_loop *loop)
>_mesa_hash_table_create(NULL, _mesa_hash_pointer,
>_mesa_key_pointer_equal);
>
> -   /* Clone the loop header */
> -   nir_cf_list cloned_header;
> -   nir_cf_list_clone(_header, _header, loop->cf_node.parent,
> - remap_table);
> -
> -   /* Insert cloned loop header before the loop */
> -   nir_cf_reinsert(_header, nir_before_cf_node(>cf_node));
> -
> -   /* Temp list to store the cloned loop body as we unroll */
> -   nir_cf_list unrolled_lp_body;
> +   /* Clone the loop header and insert before the loop */
> +   nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
> +  nir_before_cf_node(>cf_node),
> +  remap_table);
>
> /* Clone loop header and append to the loop body */

^ Leftover comment?

Apart from that, patches 1 to 4 are

Reviewed-by: Thomas Helland 

> for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
> -  /* Clone loop body */
> -  nir_cf_list_clone(_lp_body, _body, loop->cf_node.parent,
> -remap_table);
> -
> -  /* Insert unrolled loop body before the loop */
> -  nir_cf_reinsert(_lp_body, nir_before_cf_node(>cf_node));
> -
> -  /* Clone loop header */
> -  nir_cf_list_clone(_header, _header, loop->cf_node.parent,
> -remap_table);
> -
> -  /* Insert loop header after loop body */
> -  nir_cf_reinsert(_header, nir_before_cf_node(>cf_node));
> +  /* Clone loop body and insert before the loop */
> +  nir_cf_list_clone_and_reinsert(_body, loop->cf_node.parent,
> + nir_before_cf_node(>cf_node),
> + remap_table);
> +
> +  /* Clone loop header and insert after loop body */
> +  nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
> + nir_before_cf_node(>cf_node),
> + remap_table);
> }
>
> /* Remove the break from the loop terminator and add instructions from
> @@ -207,11 +197,9 @@ simple_unroll(nir_loop *loop)
>nir_after_block(limiting_term->break_block));
>
> /* Clone so things get properly remapped */
> -   nir_cf_list cloned_break_list;
> -   nir_cf_list_clone(_break_list, _list, loop->cf_node.parent,
> - remap_table);
> -
> -   nir_cf_reinsert(_break_list, nir_before_cf_node(>cf_node));
> +   nir_cf_list_clone_and_reinsert(_list, loop->cf_node.parent,
> +  nir_before_cf_node(>cf_node),
> +  remap_table);
>
> /* Remove the loop */
> nir_cf_node_remove(>cf_node);
> @@ -397,16 +385,14 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
> *unlimit_term,
> nir_cf_list cloned_header;
>
> for (unsigned i = 0; i < num_times_to_clone; i++) {
> -  /* Clone loop header */
> -  nir_cf_list_clone(_header, _header, loop->cf_node.parent,
> -remap_table);
>
>nir_cursor cursor =
>   get_complex_unroll_insert_location(unroll_loc,
>  
> unlimit_term->continue_from_then);
>
> -  /* Insert cloned loop header */
> -  nir_cf_reinsert(_header, cursor);
> +  /* Clone loop header and insert in if branch */
> +  nir_cf_list_clone_and_reinsert(_header, loop->cf_node.parent,
> + cursor, remap_table);
>
>cursor =
>   get_complex_unroll_insert_location(unroll_loc,
> @@ -432,28 +418,24 @@ complex_unroll(nir_loop *loop, nir_loop_terminator 
> *unlimit_term,
> if (!limiting_term_second) {
>assert(unroll_loc->type == nir_cf_node_if);
>
> -  nir_cf_list_clone(_header, _header, loop->cf_node.parent,
> -remap_table);
> -
>nir_cursor cursor =
>   get_complex_unroll_insert_location(unroll_loc,
>  
> unlimit_term->continue_from_then);
>
> -  /* Insert 

Re: [Mesa-dev] [PATCH 4/4] nir: detect more induction variables

2018-11-30 Thread Thomas Helland
I've done a couple passes over the patches now.
Neatly implemented and look correct to me.
With the two small nitpicks below correct this whole series is:

Reviewed-by: Thomas Helland 

Den ons. 28. nov. 2018 kl. 04:26 skrev Timothy Arceri :
>
> This adds allows loop analysis to detect inductions varibales that

This reads wierd. And s/varibales/variables

> are incremented in both branches of an if rather than in a main
> loop block. For example:
>
>loop {
>   block block_1:
>   /* preds: block_0 block_7 */
>   vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20
>   vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4
>   vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4
>   vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21
>   vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22
>   vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9
>   vec1 32 ssa_14 = ige ssa_8, ssa_5
>   /* succs: block_2 block_3 */
>   if ssa_14 {
>  block block_2:
>  /* preds: block_1 */
>  break
>  /* succs: block_8 */
>   } else {
>  block block_3:
>  /* preds: block_1 */
>  /* succs: block_4 */
>   }
>   block block_4:
>   /* preds: block_3 */
>   vec1 32 ssa_15 = ilt ssa_6, ssa_8
>   /* succs: block_5 block_6 */
>   if ssa_15 {
>  block block_5:
>  /* preds: block_4 */
>  vec1 32 ssa_16 = iadd ssa_8, ssa_7
>  vec1 32 ssa_17 = load_const (0x3f80 /* 1.00*/)
>  /* succs: block_7 */
>   } else {
>  block block_6:
>  /* preds: block_4 */
>  vec1 32 ssa_18 = iadd ssa_8, ssa_7
>  vec1 32 ssa_19 = load_const (0x3f80 /* 1.00*/)
>  /* succs: block_7 */
>   }
>   block block_7:
>   /* preds: block_5 block_6 */
>   vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18
>   vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4
>   vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19
>   /* succs: block_1 */
>}
>
> Unfortunatly GCM could move the addition out of the if for us
> (making this patch unrequired) but we still cannot enable the GCM
> pass without regressions.
>
> This unrolls a loop in Rise of The Tomb Raider.
>
> vkpipeline-db results (VEGA):
>
> Totals from affected shaders:
> SGPRS: 88 -> 96 (9.09 %)
> VGPRS: 56 -> 52 (-7.14 %)
> Spilled SGPRs: 0 -> 0 (0.00 %)
> Spilled VGPRs: 0 -> 0 (0.00 %)
> Private memory VGPRs: 0 -> 0 (0.00 %)
> Scratch size: 0 -> 0 (0.00 %) dwords per thread
> Code Size: 2168 -> 4560 (110.33 %) bytes
> LDS: 0 -> 0 (0.00 %) blocks
> Max Waves: 4 -> 4 (0.00 %)
> Wait states: 0 -> 0 (0.00 %)
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32211
> ---
>  src/compiler/nir/nir_loop_analyze.c | 36 +
>  1 file changed, 36 insertions(+)
>
> diff --git a/src/compiler/nir/nir_loop_analyze.c 
> b/src/compiler/nir/nir_loop_analyze.c
> index 8903e15105..cf97d6bf06 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -245,6 +245,42 @@ compute_induction_information(loop_info_state *state)
>   if (src_var->in_if_branch || src_var->in_nested_loop)
>  break;
>
> + /* Detect inductions varibales that are incremented in both branches

s/varibales/variables

> +  * of an unnested if rather than in a loop block.
> +  */
> + if (is_var_phi(src_var)) {
> +nir_phi_instr *src_phi =
> +   nir_instr_as_phi(src_var->def->parent_instr);
> +
> +nir_op alu_op;
> +nir_ssa_def *alu_srcs[2] = {0};
> +nir_foreach_phi_src(src2, src_phi) {
> +   nir_loop_variable *src_var2 =
> +  get_loop_var(src2->src.ssa, state);
> +
> +   if (!src_var2->in_if_branch || !is_var_alu(src_var2))
> +  break;
> +
> +   nir_alu_instr *alu =
> +  nir_instr_as_alu(src_var2->def->parent_instr);
> +   if (nir_op_infos[alu->op].num_inputs != 2)
> +  break;
> +
> +   if (alu->src[0].src.ssa == alu_srcs[0] &&
> +   alu->src[1].src.ssa == alu_srcs[1] &&
> +   alu->op == alu_op) {
> +  /* Both branches perform the same calculation so we can use
> +   * one of them to find the induction variable.
> +   */
> +  src_var = src_var2;
> +   } else {
> +   

Re: [Mesa-dev] [PATCH 4/4] nir: detect more induction variables

2018-11-29 Thread Thomas Helland
Den ons. 28. nov. 2018 kl. 10:23 skrev Timothy Arceri :
>
> On 28/11/18 6:52 pm, Thomas Helland wrote:
> > Den ons. 28. nov. 2018 kl. 04:26 skrev Timothy Arceri 
> > :
> >>
> >> This adds allows loop analysis to detect inductions varibales that
> >> are incremented in both branches of an if rather than in a main
> >> loop block. For example:
> >>
> >> loop {
> >>block block_1:
> >>/* preds: block_0 block_7 */
> >>vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20
> >>vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4
> >>vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4
> >>vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21
> >>vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22
> >>vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9
> >>vec1 32 ssa_14 = ige ssa_8, ssa_5
> >>/* succs: block_2 block_3 */
> >>if ssa_14 {
> >>   block block_2:
> >>   /* preds: block_1 */
> >>   break
> >>   /* succs: block_8 */
> >>} else {
> >>   block block_3:
> >>   /* preds: block_1 */
> >>   /* succs: block_4 */
> >>}
> >>block block_4:
> >>/* preds: block_3 */
> >>vec1 32 ssa_15 = ilt ssa_6, ssa_8
> >>/* succs: block_5 block_6 */
> >>if ssa_15 {
> >>   block block_5:
> >>   /* preds: block_4 */
> >>   vec1 32 ssa_16 = iadd ssa_8, ssa_7
> >>   vec1 32 ssa_17 = load_const (0x3f80 /* 1.00*/)
> >>   /* succs: block_7 */
> >>} else {
> >>   block block_6:
> >>   /* preds: block_4 */
> >>   vec1 32 ssa_18 = iadd ssa_8, ssa_7
> >>   vec1 32 ssa_19 = load_const (0x3f80 /* 1.00*/)
> >>   /* succs: block_7 */
> >>}
> >>block block_7:
> >>/* preds: block_5 block_6 */
> >>vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18
> >>vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4
> >>vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19
> >>/* succs: block_1 */
> >> }
> >>
> >> Unfortunatly GCM could move the addition out of the if for us
> >> (making this patch unrequired) but we still cannot enable the GCM
> >> pass without regressions.
> >>
> >
> > Just some questions / suggestions from my side for now.
> > I'll try to take a closer look at the patch later today.
> >
> > While GCM would be nice, to me it seems that adding an
> > if-opt instead, that pulls common code from both branches
> > of an if out of the if on a more general basis, would get us
> > this, plus a bunch of other benefits? As far as I can see there
> > should never be negative impacts from pulling common code
> > out like that, but I might be wrong. Did you look into that?
> > I bet out did, I'm just interested in how that worked out.
>
> I didn't attempt this because pulling code out of the ifs can increase
> register pressure. This is one of the problems we have with the GCM pass
> currently, so for now I chose a more conservative approach.
>

Yeah, of course. I'm being dumb. It looks better in source code,
but as long as it does not lead to other optimizations it will only
cause the live range of the add to intersect with that of the branch
condition. The same amount of instructions will be executed
either way.

> >
> > Since GCM is not yet where we want it to be, maybe we'd
> > want to implement LICM? That obviously does not come
> > into play with what this patch adresses, but it might help
> > get a more accurate estimate of the cost/benefit of unrolling?
> > (Invariant computations that will be CSE'd will not be
> > counted multiple times). This might already be accounted
> > for by counting the invariant computations only once?
>
> No we don't do anything like this currently. The GCM pass can pull
> things out of loops also, but again we hit register pressure issues with
> that pass.
>
> As far as I'm aware reducing invariants is not where we get most of our
> wins from with unrolling. Removing indirect array access, improving
> opportunities for constant folding (and a bunch of other passes), being
> able to evaluate the unfolded loop with the surrounding code etc all
> result in greater benefits.
>
> With the limits we place on making sure we don't unroll large loops that
> are going to cause register use issues, nobody has yet been able to show
> that always unrolling loops is causing any harm, and it's certainly been
> shown to help :)

Thanks for taking the time with my stupidity =) I'll try to take a look at
these patches later tonight =)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] nir: detect more induction variables

2018-11-27 Thread Thomas Helland
Den ons. 28. nov. 2018 kl. 04:26 skrev Timothy Arceri :
>
> This adds allows loop analysis to detect inductions varibales that
> are incremented in both branches of an if rather than in a main
> loop block. For example:
>
>loop {
>   block block_1:
>   /* preds: block_0 block_7 */
>   vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20
>   vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4
>   vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4
>   vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21
>   vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22
>   vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9
>   vec1 32 ssa_14 = ige ssa_8, ssa_5
>   /* succs: block_2 block_3 */
>   if ssa_14 {
>  block block_2:
>  /* preds: block_1 */
>  break
>  /* succs: block_8 */
>   } else {
>  block block_3:
>  /* preds: block_1 */
>  /* succs: block_4 */
>   }
>   block block_4:
>   /* preds: block_3 */
>   vec1 32 ssa_15 = ilt ssa_6, ssa_8
>   /* succs: block_5 block_6 */
>   if ssa_15 {
>  block block_5:
>  /* preds: block_4 */
>  vec1 32 ssa_16 = iadd ssa_8, ssa_7
>  vec1 32 ssa_17 = load_const (0x3f80 /* 1.00*/)
>  /* succs: block_7 */
>   } else {
>  block block_6:
>  /* preds: block_4 */
>  vec1 32 ssa_18 = iadd ssa_8, ssa_7
>  vec1 32 ssa_19 = load_const (0x3f80 /* 1.00*/)
>  /* succs: block_7 */
>   }
>   block block_7:
>   /* preds: block_5 block_6 */
>   vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18
>   vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4
>   vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19
>   /* succs: block_1 */
>}
>
> Unfortunatly GCM could move the addition out of the if for us
> (making this patch unrequired) but we still cannot enable the GCM
> pass without regressions.
>

Just some questions / suggestions from my side for now.
I'll try to take a closer look at the patch later today.

While GCM would be nice, to me it seems that adding an
if-opt instead, that pulls common code from both branches
of an if out of the if on a more general basis, would get us
this, plus a bunch of other benefits? As far as I can see there
should never be negative impacts from pulling common code
out like that, but I might be wrong. Did you look into that?
I bet out did, I'm just interested in how that worked out.

Since GCM is not yet where we want it to be, maybe we'd
want to implement LICM? That obviously does not come
into play with what this patch adresses, but it might help
get a more accurate estimate of the cost/benefit of unrolling?
(Invariant computations that will be CSE'd will not be
counted multiple times). This might already be accounted
for by counting the invariant computations only once?

> This unrolls a loop in Rise of The Tomb Raider.
>
> vkpipeline-db results (VEGA):
>
> Totals from affected shaders:
> SGPRS: 88 -> 96 (9.09 %)
> VGPRS: 56 -> 52 (-7.14 %)
> Spilled SGPRs: 0 -> 0 (0.00 %)
> Spilled VGPRs: 0 -> 0 (0.00 %)
> Private memory VGPRs: 0 -> 0 (0.00 %)
> Scratch size: 0 -> 0 (0.00 %) dwords per thread
> Code Size: 2168 -> 4560 (110.33 %) bytes
> LDS: 0 -> 0 (0.00 %) blocks
> Max Waves: 4 -> 4 (0.00 %)
> Wait states: 0 -> 0 (0.00 %)
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32211
> ---
>  src/compiler/nir/nir_loop_analyze.c | 36 +
>  1 file changed, 36 insertions(+)
>
> diff --git a/src/compiler/nir/nir_loop_analyze.c 
> b/src/compiler/nir/nir_loop_analyze.c
> index 8903e15105..cf97d6bf06 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -245,6 +245,42 @@ compute_induction_information(loop_info_state *state)
>   if (src_var->in_if_branch || src_var->in_nested_loop)
>  break;
>
> + /* Detect inductions varibales that are incremented in both branches
> +  * of an unnested if rather than in a loop block.
> +  */
> + if (is_var_phi(src_var)) {
> +nir_phi_instr *src_phi =
> +   nir_instr_as_phi(src_var->def->parent_instr);
> +
> +nir_op alu_op;
> +nir_ssa_def *alu_srcs[2] = {0};
> +nir_foreach_phi_src(src2, src_phi) {
> +   nir_loop_variable *src_var2 =
> +  get_loop_var(src2->src.ssa, state);
> +
> +   if (!src_var2->in_if_branch || !is_var_alu(src_var2))
> +  break;
> +
> +   nir_alu_instr *alu =
> +  nir_instr_as_alu(src_var2->def->parent_instr);
> +   if (nir_op_infos[alu->op].num_inputs != 2)
> +  break;
> +
> +   if (alu->src[0].src.ssa == alu_srcs[0] &&
> +   alu->src[1].src.ssa == alu_srcs[1] &&
> +   alu->op == alu_op) {
> +  /* 

Re: [Mesa-dev] [PATCH 1/2] nir: add rewrite_phi_predecessor_blocks() helper

2018-11-26 Thread Thomas Helland
This patch is:

Reviewed-by: Thomas Helland 

Den tir. 27. nov. 2018 kl. 06:32 skrev Timothy Arceri :
>
> This will also be used by the if merge pass in the following commit.
> ---
>  src/compiler/nir/nir_opt_if.c | 45 ++-
>  1 file changed, 28 insertions(+), 17 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index 8a971c43f2..62566eb403 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -237,6 +237,32 @@ is_block_empty(nir_block *block)
>exec_list_is_empty(>instr_list);
>  }
>
> +static void
> +rewrite_phi_predecessor_blocks(nir_if *nif,
> +   nir_block *old_then_block,
> +   nir_block *old_else_block,
> +   nir_block *new_then_block,
> +   nir_block *new_else_block)
> +{
> +   nir_block *after_if_block =
> +  nir_cf_node_as_block(nir_cf_node_next(>cf_node));
> +
> +   nir_foreach_instr(instr, after_if_block) {
> +  if (instr->type != nir_instr_type_phi)
> + continue;
> +
> +  nir_phi_instr *phi = nir_instr_as_phi(instr);
> +
> +  foreach_list_typed(nir_phi_src, src, node, >srcs) {
> + if (src->pred == old_then_block) {
> +src->pred = new_then_block;
> + } else if (src->pred == old_else_block) {
> +src->pred = new_else_block;
> + }
> +  }
> +   }
> +}
> +
>  /**
>   * This optimization turns:
>   *
> @@ -284,23 +310,8 @@ opt_if_simplification(nir_builder *b, nir_if *nif)
> /* Walk all the phis in the block immediately following the if statement 
> and
>  * swap the blocks.
>  */
> -   nir_block *after_if_block =
> -  nir_cf_node_as_block(nir_cf_node_next(>cf_node));
> -
> -   nir_foreach_instr(instr, after_if_block) {
> -  if (instr->type != nir_instr_type_phi)
> - continue;
> -
> -  nir_phi_instr *phi = nir_instr_as_phi(instr);
> -
> -  foreach_list_typed(nir_phi_src, src, node, >srcs) {
> - if (src->pred == else_block) {
> -src->pred = then_block;
> - } else if (src->pred == then_block) {
> -src->pred = else_block;
> - }
> -  }
> -   }
> +   rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block,
> +  then_block);
>
> /* Finally, move the else block to the then block. */
> nir_cf_list tmp;
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir/derefs: Add a nir_derefs_do_not_alias enum value

2018-11-20 Thread Thomas Helland
It seems this patch is not based on current master?
However, it is trivial enough and I like the change,
so with a rebase on current master this patch is:

Reviewed-by: Thomas Helland 

Den man. 19. nov. 2018 kl. 19:33 skrev Jason Ekstrand :
>
> This makes some of the code more clear.
> ---
>  src/compiler/nir/nir_deref.c | 8 
>  src/compiler/nir/nir_deref.h | 1 +
>  2 files changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
> index 59b36a7f255..68a3aad5c70 100644
> --- a/src/compiler/nir/nir_deref.c
> +++ b/src/compiler/nir/nir_deref.c
> @@ -289,7 +289,7 @@ nir_compare_deref_paths(nir_deref_path *a_path,
>  nir_deref_path *b_path)
>  {
> if (!modes_may_alias(b_path->path[0]->mode, a_path->path[0]->mode))
> -  return 0;
> +  return nir_derefs_do_not_alias;
>
> if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
>return nir_derefs_may_alias_bit;
> @@ -299,7 +299,7 @@ nir_compare_deref_paths(nir_deref_path *a_path,
> * they're not the same variable, we know they can't possibly alias.
> */
>if (a_path->path[0]->var != b_path->path[0]->var)
> - return 0;
> + return nir_derefs_do_not_alias;
> } else {
>assert(a_path->path[0]->deref_type == nir_deref_type_cast);
>/* If they're not exactly the same cast, we can't compare them so we
> @@ -357,7 +357,7 @@ nir_compare_deref_paths(nir_deref_path *a_path,
>  */
> if (nir_src_as_uint(a_tail->arr.index) !=
> nir_src_as_uint(b_tail->arr.index))
> -  return 0;
> +  return nir_derefs_do_not_alias;
>  } else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) {
> /* They're the same indirect, continue on */
>  } else {
> @@ -373,7 +373,7 @@ nir_compare_deref_paths(nir_deref_path *a_path,
>case nir_deref_type_struct: {
>   /* If they're different struct members, they don't even alias */
>   if (a_tail->strct.index != b_tail->strct.index)
> -return 0;
> +return nir_derefs_do_not_alias;
>   break;
>}
>
> diff --git a/src/compiler/nir/nir_deref.h b/src/compiler/nir/nir_deref.h
> index c61c3f9366f..20d40377e6e 100644
> --- a/src/compiler/nir/nir_deref.h
> +++ b/src/compiler/nir/nir_deref.h
> @@ -55,6 +55,7 @@ nir_ssa_def *nir_build_deref_offset(nir_builder *b, 
> nir_deref_instr *deref,
>  glsl_type_size_align_func size_align);
>
>  typedef enum {
> +   nir_derefs_do_not_alias = 0,
> nir_derefs_equal_bit= (1 << 0),
> nir_derefs_may_alias_bit= (1 << 1),
> nir_derefs_a_contains_b_bit = (1 << 2),
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/9] nir: Add nir_const_value_negative_equal

2018-10-08 Thread Thomas Helland
I really like this one; its very readable =)

Reviewed-by: Thomas Helland

Den tor. 30. aug. 2018 kl. 07:37 skrev Ian Romanick :
>
> From: Ian Romanick 
>
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/meson.build|  12 +
>  src/compiler/nir/nir.h  |   6 +
>  src/compiler/nir/nir_instr_set.c|  98 +
>  src/compiler/nir/tests/negative_equal_tests.cpp | 278 
> 
>  4 files changed, 394 insertions(+)
>  create mode 100644 src/compiler/nir/tests/negative_equal_tests.cpp
>
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index 090aa7a628f..5438c17a8f8 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -245,4 +245,16 @@ if with_tests
>link_with : libmesa_util,
>  )
>)
> +
> +  test(
> +'negative_equal',
> +executable(
> +  'negative_equal',
> +  files('tests/negative_equal_tests.cpp'),
> +  c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
> +  include_directories : [inc_common],
> +  dependencies : [dep_thread, idep_gtest, idep_nir],
> +  link_with : libmesa_util,
> +)
> +  )
>  endif
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 9bca6d487e9..f94538e0782 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -955,6 +955,12 @@ nir_ssa_alu_instr_src_components(const nir_alu_instr 
> *instr, unsigned src)
> return instr->dest.dest.ssa.num_components;
>  }
>
> +bool nir_const_value_negative_equal(const nir_const_value *c1,
> +const nir_const_value *c2,
> +unsigned components,
> +nir_alu_type base_type,
> +unsigned bits);
> +
>  bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
>  unsigned src1, unsigned src2);
>
> diff --git a/src/compiler/nir/nir_instr_set.c 
> b/src/compiler/nir/nir_instr_set.c
> index 19771fcd9dd..009d9661e60 100644
> --- a/src/compiler/nir/nir_instr_set.c
> +++ b/src/compiler/nir/nir_instr_set.c
> @@ -23,6 +23,7 @@
>
>  #include "nir_instr_set.h"
>  #include "nir_vla.h"
> +#include "util/half_float.h"
>
>  #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
>
> @@ -261,6 +262,103 @@ nir_srcs_equal(nir_src src1, nir_src src2)
> }
>  }
>
> +bool
> +nir_const_value_negative_equal(const nir_const_value *c1,
> +   const nir_const_value *c2,
> +   unsigned components,
> +   nir_alu_type base_type,
> +   unsigned bits)
> +{
> +   assert(base_type == nir_alu_type_get_base_type(base_type));
> +   assert(base_type != nir_type_invalid);
> +
> +   switch (base_type) {
> +   case nir_type_float:
> +  switch (bits) {
> +  case 16:
> + for (unsigned i = 0; i < components; i++) {
> +if (_mesa_half_to_float(c1->u16[i]) !=
> +-_mesa_half_to_float(c2->u16[i])) {
> +   return false;
> +}
> + }
> +
> + return true;
> +
> +  case 32:
> + for (unsigned i = 0; i < components; i++) {
> +if (c1->f32[i] != -c2->f32[i])
> +   return false;
> + }
> +
> + return true;
> +
> +  case 64:
> + for (unsigned i = 0; i < components; i++) {
> +if (c1->f64[i] != -c2->f64[i])
> +   return false;
> + }
> +
> + return true;
> +
> +  default:
> + unreachable("unknown bit size");
> +  }
> +
> +  break;
> +
> +   case nir_type_int:
> +   case nir_type_uint:
> +  switch (bits) {
> +  case 8:
> + for (unsigned i = 0; i < components; i++) {
> +if (c1->i8[i] != -c2->i8[i])
> +   return false;
> + }
> +
> + return true;
> +
> +  case 16:
> + for (unsigned i = 0; i < components; i++) {
> +if (c1->i16[i] != -c2->i16[i])
> +   return false;
> + }
> +
> + return true;
> + break;
> +
> +  case 32:
> + for (unsigned i = 0; i < components; i++) {
> +if (c1->i32[i] != -c2->i32[i])
> +   return false;
> + }
> +
> + return true;
> +
> +  case 64:
> + for (

Re: [Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions

2018-10-08 Thread Thomas Helland
fault:
>  if (!alu_ok) {
> /* It must be a move-like operation. */
> @@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned 
> *count,
>
>  static bool
>  nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
> -  unsigned limit, bool indirect_load_ok)
> +  unsigned limit, bool indirect_load_ok,
> +  bool expensive_alu_ok)
>  {
> if (nir_cf_node_is_first(>cf_node))
>return false;
> @@ -180,10 +201,17 @@ nir_opt_peephole_select_block(nir_block *block, 
> nir_shader *shader,
>
> /* ... and those blocks must only contain "allowed" instructions. */
> unsigned count = 0;
> +#if 1
> if (!block_check_for_allowed_instrs(then_block, , limit != 0,
> -   indirect_load_ok) ||
> +   indirect_load_ok, expensive_alu_ok) ||
> !block_check_for_allowed_instrs(else_block, , limit != 0,
> -   indirect_load_ok))
> +   indirect_load_ok, expensive_alu_ok))
> +#else
> +   if (!block_check_for_allowed_instrs(then_block, , 
> shader->info.stage,
> +   limit != 0, indirect_load_ok, 
> expensive_alu_ok) ||
> +   !block_check_for_allowed_instrs(else_block, , 
> shader->info.stage,
> +   limit != 0, indirect_load_ok, 
> expensive_alu_ok))
> +#endif

Leftover testing stuff?

I like the idea of hiding expensive instructions in a branch.
However, I'm wondering if it might be an idea to let drivers
provide a callback with what instructions they want to allow?
If no other driver plan on doing something like this then I
guess this is OK. I would expect the "expensive" instructions
to be mostly the same subset on most architectures too.
Might want to get some input from others, but with the debug
stuff above removed this is.

Reviewed-by: Thomas Helland

>return false;
>
> if (count > limit)
> @@ -250,14 +278,15 @@ nir_opt_peephole_select_block(nir_block *block, 
> nir_shader *shader,
>
>  static bool
>  nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
> - bool indirect_load_ok)
> + bool indirect_load_ok, bool expensive_alu_ok)
>  {
> nir_shader *shader = impl->function->shader;
> bool progress = false;
>
> nir_foreach_block_safe(block, impl) {
>progress |= nir_opt_peephole_select_block(block, shader, limit,
> -indirect_load_ok);
> +indirect_load_ok,
> +expensive_alu_ok);
> }
>
> if (progress)
> @@ -268,14 +297,15 @@ nir_opt_peephole_select_impl(nir_function_impl *impl, 
> unsigned limit,
>
>  bool
>  nir_opt_peephole_select(nir_shader *shader, unsigned limit,
> -bool indirect_load_ok)
> +bool indirect_load_ok, bool expensive_alu_ok)
>  {
> bool progress = false;
>
> nir_foreach_function(function, shader) {
>if (function->impl)
>   progress |= nir_opt_peephole_select_impl(function->impl, limit,
> -  indirect_load_ok);
> +  indirect_load_ok,
> +  expensive_alu_ok);
> }
>
> return progress;
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c 
> b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> index 5f66ef5d170..bb3bb73644a 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> @@ -107,7 +107,7 @@ ir3_optimize_loop(nir_shader *s)
> progress |= OPT(s, nir_opt_gcm, true);
> else if (gcm == 2)
> progress |= OPT(s, nir_opt_gcm, false);
> -   progress |= OPT(s, nir_opt_peephole_select, 16, true);
> +   progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
> progress |= OPT(s, nir_opt_intrinsics);
> progress |= OPT(s, nir_opt_algebraic);
> progress |= OPT(s, nir_opt_constant_folding);
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
> b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 9a7a8264283..cb1e208be8f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -813,7 +813,7 @@ si_low

Re: [Mesa-dev] [PATCH 00/11] intel/compiler: Optimize sign(x)*y

2018-10-08 Thread Thomas Helland
Den tir. 11. sep. 2018 kl. 01:30 skrev Ian Romanick :
>
> This series implements a code-generation optimization for sign(x)*y.  In
> GLSL, sign(x) is defined as:
>
> Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
>
> It is silent on the NaN behavior, so I have taken it as "undefined."  I
> don't think the new implementation will produce different results from
> the old.
>
> The optimization is only applied to the scalar backend.  On Skylake,
> there are ~1,000 shaders in VS, TCS, and TES stages helped.  It may be
> worth applying this to the vector backend for Haswell.  I have a couple
> long flights in my near future, so I might work on it then.  We'll see.
> This might also be a good newbie projet for someone wanting to get into
> the i965 compiler backend.
>
> There are actually two versions of this series.  The series that I am
> sending to the list includes "i965/fs: Eliminate dead code first".  The
> results of that patch is not good.  The other version of the series
> omits that patch, but it adds a bunch of horror to "i965/fs: Add a scale
> factor to emit_fsign".  Basically, if both the fused and non-fused
> version of the nir_op_fsign are emitted, copy propagation will propagate
> part of the common expressions, but, due to the predicated OR or XOR,
> one extra MOV will be left around.  That single instruction ruins the
> whole optimization.
>
> Both versions are available in my cgit.  List version:
>
> https://cgit.freedesktop.org/~idr/mesa/log/?h=fsign-optimization
>
> That branch includes a few things that I tried, but they did not pan
> out.
>
> Alternate version:
>
> 
> https://cgit.freedesktop.org/~idr/mesa/log/?h=fsign-optimization-emit-no-dead-code
>
> I think the version sent to the list is cleaner, but it's shader-db
> results are not as good.  The difference between the list version and
> the other version on Skylake is shown below.  Other platforms had
> similar shaped results.
>
> total instructions in shared programs: 15090997 -> 15091028 (<.01%)
> instructions in affected programs: 10251 -> 10282 (0.30%)
> helped: 0
> HURT: 26
> HURT stats (abs)   min: 1 max: 4 x̄: 1.19 x̃: 1
> HURT stats (rel)   min: 0.14% max: 1.96% x̄: 0.49% x̃: 0.24%
> 95% mean confidence interval for instructions value: 0.94 1.45
> 95% mean confidence interval for instructions %-change: 0.28% 0.71%
> Instructions are HURT.
>
> total cycles in shared programs: 565827580 -> 565824007 (<.01%)
> cycles in affected programs: 1995745 -> 1992172 (-0.18%)
> helped: 271
> HURT: 248
> helped stats (abs) min: 1 max: 623 x̄: 25.79 x̃: 5
> helped stats (rel) min: 0.02% max: 13.19% x̄: 0.94% x̃: 0.28%
> HURT stats (abs)   min: 1 max: 204 x̄: 13.78 x̃: 4
> HURT stats (rel)   min: 0.01% max: 6.57% x̄: 0.52% x̃: 0.21%
> 95% mean confidence interval for cycles value: -11.25 -2.52
> 95% mean confidence interval for cycles %-change: -0.38% -0.11%
> Cycles are helped.
>
> The version sent to the list saves a couple instructions in 26 shaders,
> but cycles are hurt.  The list version also avoids ~65 lines of ugly
> code.
>
> I also sent a couple tests to the piglit list that exersice a bug that I
> had during development.
>
> https://patchwork.freedesktop.org/patch/247911/
>
> In the alternate version, for an expression like sign(a)*sign(b),
> sign(b) would never get emitted.  When the fused sign(a)*x was emitted,
> it would explode.  The solution was to just bail on the optimization
> when sign(a)*sign(b) is encountered.  I suspect that's the source of the
> 32 shaders with instructions hurt in the alternate version, but I have
> not verified that.
>

I've rewieved some of the NIR patches in this series.
I'll leave the intel backend ones to people with more experience there.
Some of the more complicated NIR patches were left for now;
I might look at them later this week if time allows.

- Thomas

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/11] nir: Add helper functions to get the instruction that generated a nir_src

2018-10-08 Thread Thomas Helland
Den tir. 11. sep. 2018 kl. 01:30 skrev Ian Romanick :
>
> From: Ian Romanick 
>

Reviewed-by: Thomas Helland

> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir.h | 23 +++
>  1 file changed, 23 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index bf4bd916d27..69ca1215644 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2490,6 +2490,29 @@ bool nir_foreach_dest(nir_instr *instr, 
> nir_foreach_dest_cb cb, void *state);
>  bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
>
>  nir_const_value *nir_src_as_const_value(nir_src src);
> +
> +static inline struct nir_instr *
> +nir_src_instr(const struct nir_src *src)
> +{
> +   return src->is_ssa ? src->ssa->parent_instr : NULL;
> +}
> +
> +#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro)\
> +static inline c_type *  \
> +nir_src_as_ ## name (struct nir_src *src)   \
> +{   \
> +return src->is_ssa && src->ssa->parent_instr->type == type_enum \
> +   ? cast_macro(src->ssa->parent_instr) : NULL; \
> +}   \
> +static inline const c_type *\
> +nir_src_as_ ## name ## _const(const struct nir_src *src)\
> +{   \
> +return src->is_ssa && src->ssa->parent_instr->type == type_enum \
> +   ? cast_macro(src->ssa->parent_instr) : NULL; \
> +}
> +
> +NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
> +
>  bool nir_src_is_dynamically_uniform(nir_src src);
>  bool nir_srcs_equal(nir_src src1, nir_src src2);
>  void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/11] intel/compiler: Don't handle fsign.sat

2018-10-08 Thread Thomas Helland
Den tir. 11. sep. 2018 kl. 01:30 skrev Ian Romanick :
>
> From: Ian Romanick 
>
> No shader-db or CI changes on any Intel platform.
>

I'm no expert on the intel backend, but this seems trivial enough.

Reviewed-by: Thomas Helland

> Signed-off-by: Ian Romanick 
> ---
>  src/intel/compiler/brw_fs_nir.cpp   | 14 +-
>  src/intel/compiler/brw_vec4_nir.cpp | 12 ++--
>  2 files changed, 3 insertions(+), 23 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 7f453d75b64..12b087a5ec0 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -842,6 +842,7 @@ fs_visitor::nir_emit_alu(const fs_builder , 
> nir_alu_instr *instr)
>break;
>
> case nir_op_fsign: {
> +  assert(!instr->dest.saturate);
>if (op[0].abs) {
>   /* Straightforward since the source can be assumed to be either
>* strictly >= 0 or strictly <= 0 depending on the setting of the
> @@ -854,10 +855,6 @@ fs_visitor::nir_emit_alu(const fs_builder , 
> nir_alu_instr *instr)
>  : bld.MOV(result, brw_imm_f(1.0f));
>
>   set_predicate(BRW_PREDICATE_NORMAL, inst);
> -
> - if (instr->dest.saturate)
> -inst->saturate = true;
> -
>} else if (type_sz(op[0].type) < 8) {
>   /* AND(val, 0x8000) gives the sign bit.
>*
> @@ -873,10 +870,6 @@ fs_visitor::nir_emit_alu(const fs_builder , 
> nir_alu_instr *instr)
>
>   inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f80u));
>   inst->predicate = BRW_PREDICATE_NORMAL;
> - if (instr->dest.saturate) {
> -inst = bld.MOV(result, result);
> -inst->saturate = true;
> - }
>} else {
>   /* For doubles we do the same but we need to consider:
>*
> @@ -897,11 +890,6 @@ fs_visitor::nir_emit_alu(const fs_builder , 
> nir_alu_instr *instr)
>
>   set_predicate(BRW_PREDICATE_NORMAL,
> bld.OR(r, r, brw_imm_ud(0x3ff0u)));
> -
> - if (instr->dest.saturate) {
> -inst = bld.MOV(result, result);
> -inst->saturate = true;
> - }
>}
>break;
> }
> diff --git a/src/intel/compiler/brw_vec4_nir.cpp 
> b/src/intel/compiler/brw_vec4_nir.cpp
> index 124714b59de..eaf1754b006 100644
> --- a/src/intel/compiler/brw_vec4_nir.cpp
> +++ b/src/intel/compiler/brw_vec4_nir.cpp
> @@ -1818,6 +1818,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>unreachable("not reached: should have been lowered");
>
> case nir_op_fsign:
> +  assert(!instr->dest.saturate);
>if (op[0].abs) {
>   /* Straightforward since the source can be assumed to be either
>* strictly >= 0 or strictly <= 0 depending on the setting of the
> @@ -1830,10 +1831,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>  ? emit(MOV(dst, brw_imm_f(-1.0f)))
>  : emit(MOV(dst, brw_imm_f(1.0f)));
>   inst->predicate = BRW_PREDICATE_NORMAL;
> -
> - if (instr->dest.saturate)
> -inst->saturate = true;
> -
> } else if (type_sz(op[0].type) < 8) {
>   /* AND(val, 0x8000) gives the sign bit.
>*
> @@ -1849,11 +1846,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>   inst = emit(OR(dst, src_reg(dst), brw_imm_ud(0x3f80u)));
>   inst->predicate = BRW_PREDICATE_NORMAL;
>   dst.type = BRW_REGISTER_TYPE_F;
> -
> - if (instr->dest.saturate) {
> -inst = emit(MOV(dst, src_reg(dst)));
> -inst->saturate = true;
> - }
>} else {
>   /* For doubles we do the same but we need to consider:
>*
> @@ -1886,7 +1878,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>   /* Now convert the result from float to double */
>   emit_conversion_to_double(dst, retype(src_reg(tmp),
> BRW_REGISTER_TYPE_F),
> -   instr->dest.saturate);
> +   false);
>}
>break;
>
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/11] nir/algebraic: Simplify fsat of fsign

2018-10-08 Thread Thomas Helland
Den tir. 11. sep. 2018 kl. 01:30 skrev Ian Romanick :
>
> From: Ian Romanick 
>
> These allows us to not support fsign.sat in the Intel compiler backend,
> and that will simplify some later changes.
>
> No shader-db changes on any Intel platform.
>

I was a bit skeptical to how this would impact other platforms
than intel, but I've settled on it being a wash.

Reviewed-by: Thomas Helland

> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 3267e93a583..422a8794d38 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -329,6 +329,7 @@ optimizations = [
> (('imax', a, ('ineg', a)), ('iabs', a)),
> (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
> (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
> +   (('fsat', ('fsign', a)), ('b2f', ('flt', 0.0, a))),
> (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
> (('fsat', ('fsat', a)), ('fsat', a)),
> (('fmin', ('fmax', ('fmin', ('fmax', a, b), c), b), c), ('fmin', ('fmax', 
> a, b), c)),
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/11] nir/algebraic: sign(x)*x*x is abs(x)*x

2018-10-08 Thread Thomas Helland
Den tir. 11. sep. 2018 kl. 01:30 skrev Ian Romanick :
>
> From: Ian Romanick 
>

Reviewed-by: Thomas Helland

> shader-db results:
>
> All Gen7+ platforms had similar results. (Skylake shown)
> total instructions in shared programs: 15106023 -> 15105981 (<.01%)
> instructions in affected programs: 300 -> 258 (-14.00%)
> helped: 6
> HURT: 0
> helped stats (abs) min: 7 max: 7 x̄: 7.00 x̃: 7
> helped stats (rel) min: 14.00% max: 14.00% x̄: 14.00% x̃: 14.00%
> 95% mean confidence interval for instructions value: -7.00 -7.00
> 95% mean confidence interval for instructions %-change: -14.00% -14.00%
> Instructions are helped.
>
> total cycles in shared programs: 566050327 -> 566050075 (<.01%)
> cycles in affected programs: 2826 -> 2574 (-8.92%)
> helped: 6
> HURT: 0
> helped stats (abs) min: 40 max: 44 x̄: 42.00 x̃: 42
> helped stats (rel) min: 8.89% max: 8.94% x̄: 8.92% x̃: 8.92%
> 95% mean confidence interval for cycles value: -44.30 -39.70
> 95% mean confidence interval for cycles %-change: -8.95% -8.88%
> Cycles are helped.
>
> No changes on Gen6 or earlier.
>
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index ae1261f8744..3267e93a583 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -105,6 +105,11 @@ optimizations = [
> (('imul', a, 1), a),
> (('fmul', a, -1.0), ('fneg', a)),
> (('imul', a, -1), ('ineg', a)),
> +   # If a < 0: fsign(a)*a*a => -1*a*a => -a*a => abs(a)*a
> +   # If a > 0: fsign(a)*a*a => 1*a*a => a*a => abs(a)*a
> +   # If a == 0: fsign(a)*a*a => 0*0*0 => abs(0)*0
> +   (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
> +   (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
> (('~ffma', 0.0, a, b), b),
> (('~ffma', a, 0.0, b), b),
> (('~ffma', a, b, 0.0), ('fmul', a, b)),
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RESEND PATCH 0/5] i965: More cmod propagation

2018-09-26 Thread Thomas Helland
Hi Ian,

Do you have these in a branch somewhere?
Do you also have a branch somewhere of the PRE for compares?
I'll try to have a look at these, and the sign(x)*y series this evening.
I will probably only be able to review the simplest patches,
but something is probably better than nothing, I guess.

- Thomas

Den tir. 11. sep. 2018 kl. 01:32 skrev Ian Romanick :
>
> Bump
>
> On 08/29/2018 11:40 AM, Ian Romanick wrote:
> > This is mostly a resend of a series that I originally sent out around
> > the end of June.  I updated some of the shader-db results, and I dropped
> > one patch (i965/fs: Allow Boolean conditions in CSEL generation).  I
> > decided that I want to try to acomplish that with a different method.
> > That's going to take a bit more work, and I didn't want to hold up the
> > rest of the series.
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] util/dynarray: add a clone function

2018-08-15 Thread Thomas Helland
2018-08-15 23:56 GMT+02:00 Caio Marcelo de Oliveira Filho
:
> ---
>  src/util/u_dynarray.h | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
> index dcbbc06d161..4920fe04b67 100644
> --- a/src/util/u_dynarray.h
> +++ b/src/util/u_dynarray.h
> @@ -102,6 +102,15 @@ util_dynarray_resize(struct util_dynarray *buf, unsigned 
> newsize)
> return p;
>  }
>
> +static inline void
> +util_dynarray_clone(struct util_dynarray *buf, struct util_dynarray *mem_ctx,

I think we should keep the mem_ctx a void *.
Apart from that this patch is:

Reviewed-by: Thomas Helland 

> +struct util_dynarray *from_buf)
> +{
> +   util_dynarray_init(buf, mem_ctx);
> +   util_dynarray_resize(buf, from_buf->size);
> +   memcpy(buf->data, from_buf->data, from_buf->size);
> +}
> +
>  static inline void *
>  util_dynarray_grow(struct util_dynarray *buf, int diff)
>  {
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/13] nir: Transform expressions of b2f(a) and b2f(b) to a || b

2018-08-03 Thread Thomas Helland
With the correction of the fmin/fmax pattern in this one and patch 10,
and the inot version of the pattern in patch 9 and 11,
patch 7, and 9-11 are:

Reviewed-by: Thomas Helland 

I think that should be the whole series.

2018-08-03 1:38 GMT+02:00 Ian Romanick :
> On 08/02/2018 02:10 PM, Thomas Helland wrote:
>> 2018-08-02 20:19 GMT+02:00 Ian Romanick :
>>> From: Ian Romanick 
>>>
>>> All Gen6+ platforms had pretty similar results. (Skylake shown)
>>> total instructions in shared programs: 14277184 -> 14276964 (<.01%)
>>> instructions in affected programs: 10082 -> 9862 (-2.18%)
>>> helped: 37
>>> HURT: 1
>>> helped stats (abs) min: 1 max: 30 x̄: 5.97 x̃: 4
>>> helped stats (rel) min: 0.14% max: 16.00% x̄: 5.23% x̃: 2.04%
>>> HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
>>> HURT stats (rel)   min: 0.70% max: 0.70% x̄: 0.70% x̃: 0.70%
>>> 95% mean confidence interval for instructions value: -7.87 -3.71
>>> 95% mean confidence interval for instructions %-change: -6.98% -3.16%
>>> Instructions are helped.
>>>
>>> total cycles in shared programs: 532577990 -> 532577062 (<.01%)
>>> cycles in affected programs: 170959 -> 170031 (-0.54%)
>>> helped: 33
>>> HURT: 9
>>> helped stats (abs) min: 2 max: 120 x̄: 30.91 x̃: 30
>>> helped stats (rel) min: 0.02% max: 7.65% x̄: 2.66% x̃: 1.13%
>>> HURT stats (abs)   min: 2 max: 24 x̄: 10.22 x̃: 8
>>> HURT stats (rel)   min: 0.09% max: 1.79% x̄: 0.61% x̃: 0.22%
>>> 95% mean confidence interval for cycles value: -31.23 -12.96
>>> 95% mean confidence interval for cycles %-change: -2.90% -1.02%
>>> Cycles are helped.
>>>
>>> Iron Lake and GM45 had similar results. (Iron Lake shown)
>>> total instructions in shared programs: 7781539 -> 7781301 (<.01%)
>>> instructions in affected programs: 10169 -> 9931 (-2.34%)
>>> helped: 32
>>> HURT: 0
>>> helped stats (abs) min: 2 max: 20 x̄: 7.44 x̃: 6
>>> helped stats (rel) min: 0.47% max: 17.02% x̄: 4.03% x̃: 1.88%
>>> 95% mean confidence interval for instructions value: -9.53 -5.34
>>> 95% mean confidence interval for instructions %-change: -5.94% -2.12%
>>> Instructions are helped.
>>>
>>> total cycles in shared programs: 177878590 -> 177878932 (<.01%)
>>> cycles in affected programs: 78706 -> 79048 (0.43%)
>>> helped: 7
>>> HURT: 21
>>> helped stats (abs) min: 6 max: 34 x̄: 24.57 x̃: 28
>>> helped stats (rel) min: 0.15% max: 8.33% x̄: 4.66% x̃: 6.37%
>>> HURT stats (abs)   min: 2 max: 86 x̄: 24.48 x̃: 22
>>> HURT stats (rel)   min: 0.01% max: 4.28% x̄: 1.21% x̃: 0.70%
>>> 95% mean confidence interval for cycles value: 0.30 24.13
>>> 95% mean confidence interval for cycles %-change: -1.52% 1.01%
>>> Inconclusive result (%-change mean confidence interval includes 0).
>>>
>>> Signed-off-by: Ian Romanick 
>>> ---
>>>  src/compiler/nir/nir_opt_algebraic.py | 19 +++
>>>  1 file changed, 19 insertions(+)
>>>
>>> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
>>> b/src/compiler/nir/nir_opt_algebraic.py
>>> index 8300d6d01f5..4d60467fbac 100644
>>> --- a/src/compiler/nir/nir_opt_algebraic.py
>>> +++ b/src/compiler/nir/nir_opt_algebraic.py
>>> @@ -166,6 +166,25 @@ optimizations = [
>>>
>>> (('fge', ('fneg', ('b2f', a)), 0.0), ('inot', a)),
>>>
>>> +   (('fne', ('fadd', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),
>>> +   (('fne', ('fmin', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),
>>
>> Maybe I'm just too sleepy, but shouldn't this be either
>>
>> (('fne', ('fmax', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),
>>
>> or
>>
>> (('fne', ('fmin', ('b2f', a), ('b2f', b)), 0.0), ('iand', a, b)),
>
> Yes... which means that patch 10 is also incorrect.
>
>>> +   (('fne', ('bcsel', a, 1.0, ('b2f', b))   , 0.0), ('ior', a, b)),
>>> +   (('fne', ('b2f', a), ('fneg', ('b2f', b))),  ('ior', a, b)),
>>> +
>>> +   # -(b2f(a) + b2f(b)) < 0
>>> +   # 0 < b2f(a) + b2f(b)
>>> +   # 0 != b2f(a) + b2f(b)   b2f must be 0 or 1, so the sum is 
>>> non-negative
>>> +   # a || b
>>> +   (('flt', ('fneg', ('fadd', ('b2f', a), ('b2f', b))), 0.0), ('ior', a, 
>>> b)),
>>> +   (('flt', 0.0, ('fadd', ('b2f', a), ('b2f', b))), ('ior', a, b)),
>>> +
>>> +   # Some optimizations (below) convert things like (a &l

Re: [Mesa-dev] [PATCH 06/13] nir: Transform -fabs(a) < 0 to a != 0

2018-08-02 Thread Thomas Helland
2018-08-02 23:05 GMT+02:00 Thomas Helland :
> 2018-08-02 20:19 GMT+02:00 Ian Romanick :
>> From: Ian Romanick 
>>
>> Unlike the much older -abs(a) >= 0.0 transformation, this is not
>> precise.  The behavior changes if a is NaN.
>>
>> All Gen platforms had pretty similar results. (Skylake shown)
>> total instructions in shared programs: 14277216 -> 14277184 (<.01%)
>> instructions in affected programs: 2300 -> 2268 (-1.39%)
>> helped: 8
>> HURT: 0
>> helped stats (abs) min: 1 max: 8 x̄: 4.00 x̃: 3
>> helped stats (rel) min: 0.48% max: 15.15% x̄: 4.41% x̃: 1.01%
>> 95% mean confidence interval for instructions value: -6.45 -1.55
>> 95% mean confidence interval for instructions %-change: -9.96% 1.13%
>> Inconclusive result (%-change mean confidence interval includes 0).
>>
>> total cycles in shared programs: 532577848 -> 532577990 (<.01%)
>> cycles in affected programs: 17486 -> 17628 (0.81%)
>> helped: 2
>> HURT: 5
>> helped stats (abs) min: 2 max: 6 x̄: 4.00 x̃: 4
>> helped stats (rel) min: 0.06% max: 1.81% x̄: 0.93% x̃: 0.93%
>> HURT stats (abs)   min: 6 max: 50 x̄: 30.00 x̃: 26
>> HURT stats (rel)   min: 0.55% max: 2.17% x̄: 1.19% x̃: 1.02%
>> 95% mean confidence interval for cycles value: -1.06 41.63
>> 95% mean confidence interval for cycles %-change: -0.58% 1.74%
>> Inconclusive result (value mean confidence interval includes 0).
>>
>> Signed-off-by: Ian Romanick 
>> ---
>>  src/compiler/nir/nir_opt_algebraic.py | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
>> b/src/compiler/nir/nir_opt_algebraic.py
>> index b96a7f93c08..8300d6d01f5 100644
>> --- a/src/compiler/nir/nir_opt_algebraic.py
>> +++ b/src/compiler/nir/nir_opt_algebraic.py
>> @@ -212,6 +212,10 @@ optimizations = [
>> # a != 0.0
>> (('~flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
>>
>> +   # -fabs(a) < 0.0
>> +   # fabs(a) > 0.0
>> +   (('~flt', ('fneg', ('fabs', a)), 0.0), ('fne', a, 0.0)),
>> +
>
> I'm not sure if the machinery behind the algebraic opts
> sees the fneg and propagates it? It seems the second comment
> here would be better fit for the optimization just above?
>

Oh goodness. Forget my comment. It's obviously showing
a step-by-step proof.

> Either way though, for patch 1-6
>
> Reviewed-by: Thomas Helland
>
>> (('fmax',('b2f(is_used_once)', a),   
>> ('b2f', b)),   ('b2f', ('ior', a, b))),
>> (('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', 
>> ('b2f', b))), ('fneg', ('b2f', ('ior', a, b,
>> (('fmin',('b2f(is_used_once)', a),   
>> ('b2f', b)),   ('b2f', ('iand', a, b))),
>> --
>> 2.14.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/13] nir: Transform expressions of b2f(a) and b2f(b) to a == b

2018-08-02 Thread Thomas Helland
Patches 12 and 13 are:

Reviewed-by: Thomas Helland

2018-08-02 20:19 GMT+02:00 Ian Romanick :
> From: Ian Romanick 
>
> All Gen7+ platforms had similar results. (Skylake shown)
> total instructions in shared programs: 14276886 -> 14276838 (<.01%)
> instructions in affected programs: 312 -> 264 (-15.38%)
> helped: 2
> HURT: 0
>
> total cycles in shared programs: 532578395 -> 532570985 (<.01%)
> cycles in affected programs: 682562 -> 675152 (-1.09%)
> helped: 374
> HURT: 4
> helped stats (abs) min: 2 max: 200 x̄: 20.39 x̃: 18
> helped stats (rel) min: 0.07% max: 11.64% x̄: 1.25% x̃: 1.28%
> HURT stats (abs)   min: 2 max: 114 x̄: 53.50 x̃: 49
> HURT stats (rel)   min: 0.06% max: 11.70% x̄: 5.02% x̃: 4.15%
> 95% mean confidence interval for cycles value: -21.30 -17.91
> 95% mean confidence interval for cycles %-change: -1.30% -1.06%
> Cycles are helped.
>
> Sandy Bridge
> total instructions in shared programs: 10488123 -> 10488075 (<.01%)
> instructions in affected programs: 336 -> 288 (-14.29%)
> helped: 2
> HURT: 0
>
> total cycles in shared programs: 150260379 -> 150260439 (<.01%)
> cycles in affected programs: 4726 -> 4786 (1.27%)
> helped: 0
> HURT: 2
>
> No changes on Iron Lake or GM45.
>
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 87e370e891a..e7e5aeaed04 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -183,6 +183,9 @@ optimizations = [
> (('feq', ('fmul', ('b2f', a), ('b2f', b)), 0.0), ('inot', ('iand', a, 
> b))),
> (('feq', ('fmax', ('b2f', a), ('b2f', b)), 0.0), ('inot', ('iand', a, 
> b))),
> (('feq', ('bcsel', a, ('b2f', b), 0.0)   , 0.0), ('inot', ('iand', a, 
> b))),
> +   (('feq', ('fadd', ('b2f', a), ('fneg', ('b2f', b))), 0.0), ('ieq', a, b)),
> +   (('feq',  ('b2f', a) ,  ('b2f', b) ),  ('ieq', a, b)),
> +   (('feq', ('fneg', ('b2f', a)), ('fneg', ('b2f', b))),  ('ieq', a, b)),
>
> # -(b2f(a) + b2f(b)) < 0
> # 0 < b2f(a) + b2f(b)
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/13] nir: Transform -fabs(a) >= 0 to a == 0

2018-08-02 Thread Thomas Helland
This patch is:

Reviewed-by: Thomas Helland

2018-08-02 20:19 GMT+02:00 Ian Romanick :
> From: Ian Romanick 
>
> All Gen platforms had pretty similar results. (Skylake shown)
> total instructions in shared programs: 14276964 -> 14276961 (<.01%)
> instructions in affected programs: 411 -> 408 (-0.73%)
> helped: 3
> HURT: 0
> helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
> helped stats (rel) min: 0.47% max: 1.96% x̄: 1.04% x̃: 0.68%
>
> total cycles in shared programs: 532577062 -> 532577068 (<.01%)
> cycles in affected programs: 1093 -> 1099 (0.55%)
> helped: 1
> HURT: 1
> helped stats (abs) min: 16 max: 16 x̄: 16.00 x̃: 16
> helped stats (rel) min: 7.77% max: 7.77% x̄: 7.77% x̃: 7.77%
> HURT stats (abs)   min: 22 max: 22 x̄: 22.00 x̃: 22
> HURT stats (rel)   min: 2.48% max: 2.48% x̄: 2.48% x̃: 2.48%
>
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 4d60467fbac..e1e2ef09845 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -235,6 +235,15 @@ optimizations = [
> # fabs(a) > 0.0
> (('~flt', ('fneg', ('fabs', a)), 0.0), ('fne', a, 0.0)),
>
> +   # 0.0 >= fabs(a)
> +   # 0.0 == fabs(a)   because fabs(a) must be >= 0
> +   # 0.0 == a
> +   (('fge', 0.0, ('fabs', a)), ('feq', a, 0.0)),
> +
> +   # -fabs(a) >= 0.0
> +   # 0.0 >= fabs(a)
> +   (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
> +
> (('fmax',('b2f(is_used_once)', a),   
> ('b2f', b)),   ('b2f', ('ior', a, b))),
> (('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', 
> ('b2f', b))), ('fneg', ('b2f', ('ior', a, b,
> (('fmin',('b2f(is_used_once)', a),   
> ('b2f', b)),   ('b2f', ('iand', a, b))),
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/13] nir: Transform expressions of b2f(a) and b2f(b) to a || b

2018-08-02 Thread Thomas Helland
2018-08-02 20:19 GMT+02:00 Ian Romanick :
> From: Ian Romanick 
>
> All Gen6+ platforms had pretty similar results. (Skylake shown)
> total instructions in shared programs: 14277184 -> 14276964 (<.01%)
> instructions in affected programs: 10082 -> 9862 (-2.18%)
> helped: 37
> HURT: 1
> helped stats (abs) min: 1 max: 30 x̄: 5.97 x̃: 4
> helped stats (rel) min: 0.14% max: 16.00% x̄: 5.23% x̃: 2.04%
> HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
> HURT stats (rel)   min: 0.70% max: 0.70% x̄: 0.70% x̃: 0.70%
> 95% mean confidence interval for instructions value: -7.87 -3.71
> 95% mean confidence interval for instructions %-change: -6.98% -3.16%
> Instructions are helped.
>
> total cycles in shared programs: 532577990 -> 532577062 (<.01%)
> cycles in affected programs: 170959 -> 170031 (-0.54%)
> helped: 33
> HURT: 9
> helped stats (abs) min: 2 max: 120 x̄: 30.91 x̃: 30
> helped stats (rel) min: 0.02% max: 7.65% x̄: 2.66% x̃: 1.13%
> HURT stats (abs)   min: 2 max: 24 x̄: 10.22 x̃: 8
> HURT stats (rel)   min: 0.09% max: 1.79% x̄: 0.61% x̃: 0.22%
> 95% mean confidence interval for cycles value: -31.23 -12.96
> 95% mean confidence interval for cycles %-change: -2.90% -1.02%
> Cycles are helped.
>
> Iron Lake and GM45 had similar results. (Iron Lake shown)
> total instructions in shared programs: 7781539 -> 7781301 (<.01%)
> instructions in affected programs: 10169 -> 9931 (-2.34%)
> helped: 32
> HURT: 0
> helped stats (abs) min: 2 max: 20 x̄: 7.44 x̃: 6
> helped stats (rel) min: 0.47% max: 17.02% x̄: 4.03% x̃: 1.88%
> 95% mean confidence interval for instructions value: -9.53 -5.34
> 95% mean confidence interval for instructions %-change: -5.94% -2.12%
> Instructions are helped.
>
> total cycles in shared programs: 177878590 -> 177878932 (<.01%)
> cycles in affected programs: 78706 -> 79048 (0.43%)
> helped: 7
> HURT: 21
> helped stats (abs) min: 6 max: 34 x̄: 24.57 x̃: 28
> helped stats (rel) min: 0.15% max: 8.33% x̄: 4.66% x̃: 6.37%
> HURT stats (abs)   min: 2 max: 86 x̄: 24.48 x̃: 22
> HURT stats (rel)   min: 0.01% max: 4.28% x̄: 1.21% x̃: 0.70%
> 95% mean confidence interval for cycles value: 0.30 24.13
> 95% mean confidence interval for cycles %-change: -1.52% 1.01%
> Inconclusive result (%-change mean confidence interval includes 0).
>
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 19 +++
>  1 file changed, 19 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 8300d6d01f5..4d60467fbac 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -166,6 +166,25 @@ optimizations = [
>
> (('fge', ('fneg', ('b2f', a)), 0.0), ('inot', a)),
>
> +   (('fne', ('fadd', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),
> +   (('fne', ('fmin', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),

Maybe I'm just too sleepy, but shouldn't this be either

(('fne', ('fmax', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),

or

(('fne', ('fmin', ('b2f', a), ('b2f', b)), 0.0), ('iand', a, b)),


> +   (('fne', ('bcsel', a, 1.0, ('b2f', b))   , 0.0), ('ior', a, b)),
> +   (('fne', ('b2f', a), ('fneg', ('b2f', b))),  ('ior', a, b)),
> +
> +   # -(b2f(a) + b2f(b)) < 0
> +   # 0 < b2f(a) + b2f(b)
> +   # 0 != b2f(a) + b2f(b)   b2f must be 0 or 1, so the sum is 
> non-negative
> +   # a || b
> +   (('flt', ('fneg', ('fadd', ('b2f', a), ('b2f', b))), 0.0), ('ior', a, b)),
> +   (('flt', 0.0, ('fadd', ('b2f', a), ('b2f', b))), ('ior', a, b)),
> +
> +   # Some optimizations (below) convert things like (a < b || c < b) into
> +   # (min(a, c) < b).  However, this interfers with the previous 
> optimizations
> +   # that try to remove comparisons with negated sums of b2f.  This just
> +   # breaks that apart.
> +   (('flt', ('fmin', c, ('fneg', ('fadd', ('b2f', a), ('b2f', b, 0.0),
> +('ior', ('flt', c, 0.0), ('ior', a, b))),
> +
> (('~flt', ('fadd', a, b), a), ('flt', b, 0.0)),
> (('~fge', ('fadd', a, b), a), ('fge', b, 0.0)),
> (('~feq', ('fadd', a, b), a), ('feq', b, 0.0)),
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/13] nir: Transform -fabs(a) < 0 to a != 0

2018-08-02 Thread Thomas Helland
2018-08-02 20:19 GMT+02:00 Ian Romanick :
> From: Ian Romanick 
>
> Unlike the much older -abs(a) >= 0.0 transformation, this is not
> precise.  The behavior changes if a is NaN.
>
> All Gen platforms had pretty similar results. (Skylake shown)
> total instructions in shared programs: 14277216 -> 14277184 (<.01%)
> instructions in affected programs: 2300 -> 2268 (-1.39%)
> helped: 8
> HURT: 0
> helped stats (abs) min: 1 max: 8 x̄: 4.00 x̃: 3
> helped stats (rel) min: 0.48% max: 15.15% x̄: 4.41% x̃: 1.01%
> 95% mean confidence interval for instructions value: -6.45 -1.55
> 95% mean confidence interval for instructions %-change: -9.96% 1.13%
> Inconclusive result (%-change mean confidence interval includes 0).
>
> total cycles in shared programs: 532577848 -> 532577990 (<.01%)
> cycles in affected programs: 17486 -> 17628 (0.81%)
> helped: 2
> HURT: 5
> helped stats (abs) min: 2 max: 6 x̄: 4.00 x̃: 4
> helped stats (rel) min: 0.06% max: 1.81% x̄: 0.93% x̃: 0.93%
> HURT stats (abs)   min: 6 max: 50 x̄: 30.00 x̃: 26
> HURT stats (rel)   min: 0.55% max: 2.17% x̄: 1.19% x̃: 1.02%
> 95% mean confidence interval for cycles value: -1.06 41.63
> 95% mean confidence interval for cycles %-change: -0.58% 1.74%
> Inconclusive result (value mean confidence interval includes 0).
>
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index b96a7f93c08..8300d6d01f5 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -212,6 +212,10 @@ optimizations = [
> # a != 0.0
> (('~flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
>
> +   # -fabs(a) < 0.0
> +   # fabs(a) > 0.0
> +   (('~flt', ('fneg', ('fabs', a)), 0.0), ('fne', a, 0.0)),
> +

I'm not sure if the machinery behind the algebraic opts
sees the fneg and propagates it? It seems the second comment
here would be better fit for the optimization just above?

Either way though, for patch 1-6

Reviewed-by: Thomas Helland

> (('fmax',('b2f(is_used_once)', a),   
> ('b2f', b)),   ('b2f', ('ior', a, b))),
> (('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', 
> ('b2f', b))), ('fneg', ('b2f', ('ior', a, b,
> (('fmin',('b2f(is_used_once)', a),   
> ('b2f', b)),   ('b2f', ('iand', a, b))),
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 3/3] glsl: propagate full variables eagerly

2018-07-26 Thread Thomas Helland
Since we're always going top-down through the
program there should be no need to "walk backwards",
so this approach should be enough to get the whole chain
of assignments in one pass. Neat.

Reviewed-by: Thomas Helland

2018-07-25 3:03 GMT+02:00 Caio Marcelo de Oliveira Filho
:
> When creating a new acp_entry after an assignment "c = b", check if b
> itself has an acp_entry with a full variable associated and use
> that. This reduces the number of passes the algorithm needs to
> propagate a value in a chain of assignments.
>
> I've tried to make a similar change to the write_partial, but it
> caused noise in the final output (hurting instruction count). The
> reason is for partials, a propagation might imply a swizzle
> operation.
>
> We could later investigate if it is worth to restrict the cases we are
> eager to avoid getting things worse because of swizzling.
> ---
>  .../glsl/opt_copy_propagation_elements.cpp| 23 ++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp 
> b/src/compiler/glsl/opt_copy_propagation_elements.cpp
> index cae6d3c0707..c44f7c56f11 100644
> --- a/src/compiler/glsl/opt_copy_propagation_elements.cpp
> +++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp
> @@ -169,8 +169,29 @@ public:
>   }
>}
>
> +  /* If the rhs has an acp_entry pointing to another full variable, use
> +   * that. This allows propagation to happen all in one pass, instead of
> +   * having the value walking slowly. E.g.
> +   *
> +   * b = a
> +   * c = b
> +   * d = c
> +   * use(d)
> +   *
> +   * will need one pass to propagate to
> +   *
> +   * b = a
> +   * c = a// Because of b acp_entry.
> +   * d = a// Because of c acp_entry that uses 'a' directly.
> +   * use(a)   // Because of d acp_entry that uses 'a' directly.
> +   */
> +  acp_entry *rhs_entry = read(rhs);
> +  if (rhs_entry && rhs_entry->rhs_full != NULL) {
> + rhs = rhs_entry->rhs_full;
> +  }
> +  rhs_entry = pull_acp(rhs);
> +
>lhs_entry->rhs_full = rhs;
> -  acp_entry *rhs_entry = pull_acp(rhs);
>_mesa_set_add(rhs_entry->dsts, lhs);
>
>if (lhs->type->is_vector()) {
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 2/3] glsl: use only copy_propagation_elements

2018-07-26 Thread Thomas Helland
This is:
Reviewed-by: Thomas Helland

2018-07-25 3:03 GMT+02:00 Caio Marcelo de Oliveira Filho
:
> Now that the elements version handles both cases, remove the
> non-elements version.
>
> Reviewed-by: Eric Anholt 
> ---
>  src/compiler/Makefile.sources  |   1 -
>  src/compiler/glsl/glsl_parser_extras.cpp   |   1 -
>  src/compiler/glsl/ir_optimization.h|   1 -
>  src/compiler/glsl/meson.build  |   1 -
>  src/compiler/glsl/opt_copy_propagation.cpp | 369 -
>  src/compiler/glsl/test_optpass.cpp |   2 -
>  6 files changed, 375 deletions(-)
>  delete mode 100644 src/compiler/glsl/opt_copy_propagation.cpp
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index cc147218c4e..908508adffb 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -129,7 +129,6 @@ LIBGLSL_FILES = \
> glsl/opt_constant_folding.cpp \
> glsl/opt_constant_propagation.cpp \
> glsl/opt_constant_variable.cpp \
> -   glsl/opt_copy_propagation.cpp \
> glsl/opt_copy_propagation_elements.cpp \
> glsl/opt_dead_builtin_variables.cpp \
> glsl/opt_dead_builtin_varyings.cpp \
> diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
> b/src/compiler/glsl/glsl_parser_extras.cpp
> index 04eba980e0e..6d92f24ea22 100644
> --- a/src/compiler/glsl/glsl_parser_extras.cpp
> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
> @@ -2214,7 +2214,6 @@ do_common_optimization(exec_list *ir, bool linked,
> OPT(do_if_simplification, ir);
> OPT(opt_flatten_nested_if_blocks, ir);
> OPT(opt_conditional_discard, ir);
> -   OPT(do_copy_propagation, ir);
> OPT(do_copy_propagation_elements, ir);
>
> if (options->OptimizeForAOS && !linked)
> diff --git a/src/compiler/glsl/ir_optimization.h 
> b/src/compiler/glsl/ir_optimization.h
> index b0e84608c58..ef68b93c09e 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -103,7 +103,6 @@ bool opt_conditional_discard(exec_list *instructions);
>  bool do_constant_folding(exec_list *instructions);
>  bool do_constant_variable(exec_list *instructions);
>  bool do_constant_variable_unlinked(exec_list *instructions);
> -bool do_copy_propagation(exec_list *instructions);
>  bool do_copy_propagation_elements(exec_list *instructions);
>  bool do_constant_propagation(exec_list *instructions);
>  void do_dead_builtin_varyings(struct gl_context *ctx,
> diff --git a/src/compiler/glsl/meson.build b/src/compiler/glsl/meson.build
> index 96536b80168..09662b20775 100644
> --- a/src/compiler/glsl/meson.build
> +++ b/src/compiler/glsl/meson.build
> @@ -170,7 +170,6 @@ files_libglsl = files(
>'opt_constant_folding.cpp',
>'opt_constant_propagation.cpp',
>'opt_constant_variable.cpp',
> -  'opt_copy_propagation.cpp',
>'opt_copy_propagation_elements.cpp',
>'opt_dead_builtin_variables.cpp',
>'opt_dead_builtin_varyings.cpp',
> diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
> b/src/compiler/glsl/opt_copy_propagation.cpp
> deleted file mode 100644
> index 206dffe4f1c..000
> --- a/src/compiler/glsl/opt_copy_propagation.cpp
> +++ /dev/null
> @@ -1,369 +0,0 @@
> -/*
> - * Copyright © 2010 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> - * DEALINGS IN THE SOFTWARE.
> - */
> -
> -/**
> - * \file opt_copy_propagation.cpp
> - *
> - * Moves usage of recently-copied variables to the previous copy of
> - * the variable.
> - *
> - * This should reduce the number of MOV instructi

Re: [Mesa-dev] [PATCH v3 1/3] glsl: teach copy_propagation_elements to deal with whole variables

2018-07-26 Thread Thomas Helland
for (int i = 0; i < 4; i++) {
> +if (lhs_entry->rhs_element[i])
> +   remove_from_dsts(lhs_entry->rhs_element[i], lhs);
> + }
> +  }
> +
> +  lhs_entry->rhs_full = rhs;
> +  acp_entry *rhs_entry = pull_acp(rhs);
> +  _mesa_set_add(rhs_entry->dsts, lhs);
> +
> +  if (lhs->type->is_vector()) {
> + for (int i = 0; i < 4; i++) {
> +lhs_entry->rhs_element[i] = rhs;
> +lhs_entry->rhs_channel[i] = i;
> + }
> +  }
> +   }
> +
> void remove_unused_var_from_dsts(acp_entry *lhs_entry, ir_variable *lhs, 
> ir_variable *var)
> {
>if (!var)
> @@ -204,6 +239,14 @@ private:
>return entry;
> }
>
> +   void
> +   remove_from_dsts(ir_variable *var, ir_variable *to_remove)
> +   {
> +  acp_entry *entry = pull_acp(var);
> +  assert(entry);
> +  _mesa_set_remove(entry->dsts, _mesa_set_search(entry->dsts, 
> to_remove));
> +   }
> +

Use the newly added _mesa_set_remove_key?
Apart from that, this looks good to me


Reviewed-by: Thomas Helland


> /** Available Copy to Propagate table, from variable to the entry
>  *  containing the current sources that can be used. */
> hash_table *acp;
> @@ -247,6 +290,8 @@ public:
>ralloc_free(mem_ctx);
> }
>
> +   virtual ir_visitor_status visit(ir_dereference_variable *);
> +
> void handle_loop(ir_loop *, bool keep_acp);
> virtual ir_visitor_status visit_enter(class ir_loop *);
> virtual ir_visitor_status visit_enter(class ir_function_signature *);
> @@ -282,6 +327,21 @@ public:
>
>  } /* unnamed namespace */
>
> +ir_visitor_status
> +ir_copy_propagation_elements_visitor::visit(ir_dereference_variable *ir)
> +{
> +   if (this->in_assignee)
> +  return visit_continue;
> +
> +   const acp_entry *entry = state->read(ir->var);
> +   if (entry && entry->rhs_full) {
> +  ir->var = (ir_variable *) entry->rhs_full;
> +  progress = true;
> +   }
> +
> +   return visit_continue;
> +}
> +
>  ir_visitor_status
>  ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
>  {
> @@ -316,16 +376,14 @@ 
> ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
> ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
> ir_variable *var = ir->lhs->variable_referenced();
>
> -   if (var->type->is_scalar() || var->type->is_vector()) {
> -  kill_entry *k;
> +   kill_entry *k;
>
> -  if (lhs)
> -k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
> -  else
> -k = new(this->lin_ctx) kill_entry(var, ~0);
> +   if (lhs && var->type->is_vector())
> +  k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
> +   else
> +  k = new(this->lin_ctx) kill_entry(var, ~0);
>
> -  kill(k);
> -   }
> +   kill(k);
>
> add_copy(ir);
>
> @@ -460,11 +518,25 @@ 
> ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
>}
> }
>
> -   /* Since we're unlinked, we don't (necessarily) know the side effects of
> -* this call.  So kill all copies.
> -*/
> -   this->state->erase_all();
> -   this->killed_all = true;
> +   if (!ir->callee->is_intrinsic()) {
> +  state->erase_all();
> +  this->killed_all = true;
> +   } else {
> +  if (ir->return_deref) {
> + kill(new(this->lin_ctx) kill_entry(ir->return_deref->var, ~0));
> +  }
> +
> +  foreach_two_lists(formal_node, >callee->parameters,
> +actual_node, >actual_parameters) {
> + ir_variable *sig_param = (ir_variable *) formal_node;
> + if (sig_param->data.mode == ir_var_function_out ||
> + sig_param->data.mode == ir_var_function_inout) {
> +ir_rvalue *ir = (ir_rvalue *) actual_node;
> +ir_variable *var = ir->variable_referenced();
> +kill(new(this->lin_ctx) kill_entry(var, ~0));
> + }
> +  }
> +   }
>
> return visit_continue_with_parent;
>  }
> @@ -585,12 +657,29 @@ ir_copy_propagation_elements_visitor::kill(kill_entry 
> *k)
>  void
>  ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
>  {
> -   int orig_swizzle[4] = {0, 1, 2, 3};
> -   int swizzle[4];
> -
> if (ir->condition)
>return;
>
> +   {
> +  ir_variable *lhs_var = ir->whole_variable_written();
> +  ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
> 

Re: [Mesa-dev] [PATCH 06/12] nir: Add a structure splitting pass

2018-07-26 Thread Thomas Helland
2018-07-26 18:00 GMT+02:00 Jason Ekstrand :
> ---
>  src/compiler/Makefile.sources |   1 +
>  src/compiler/nir/meson.build  |   1 +
>  src/compiler/nir/nir.h|   1 +
>  src/compiler/nir/nir_split_vars.c | 271 ++
>  4 files changed, 274 insertions(+)
>  create mode 100644 src/compiler/nir/nir_split_vars.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index cc147218c4e..144ba94a8c6 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -300,6 +300,7 @@ NIR_FILES = \
> nir/nir_serialize.h \
> nir/nir_split_per_member_structs.c \
> nir/nir_split_var_copies.c \
> +   nir/nir_split_vars.c \
> nir/nir_sweep.c \
> nir/nir_to_lcssa.c \
> nir/nir_validate.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index a1bb19356ce..3fd5535ba52 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -184,6 +184,7 @@ files_libnir = files(
>'nir_serialize.h',
>'nir_split_per_member_structs.c',
>'nir_split_var_copies.c',
> +  'nir_split_vars.c',
>'nir_sweep.c',
>'nir_to_lcssa.c',
>'nir_validate.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 3bfe7d7f7bf..4af7166f25b 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2609,6 +2609,7 @@ int nir_gs_count_vertices(const nir_shader *shader);
>
>  bool nir_split_var_copies(nir_shader *shader);
>  bool nir_split_per_member_structs(nir_shader *shader);
> +bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
>
>  bool nir_lower_returns_impl(nir_function_impl *impl);
>  bool nir_lower_returns(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_split_vars.c 
> b/src/compiler/nir/nir_split_vars.c
> new file mode 100644
> index 000..1f59ac2f5e7
> --- /dev/null
> +++ b/src/compiler/nir/nir_split_vars.c
> @@ -0,0 +1,271 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +#include "nir_deref.h"
> +
> +struct split_var_state {
> +   void *mem_ctx;
> +
> +   nir_shader *shader;
> +   nir_function_impl *impl;
> +
> +   nir_variable *base_var;
> +};
> +
> +struct field {
> +   struct field *parent;
> +
> +   const struct glsl_type *type;
> +
> +   unsigned num_fields;
> +   struct field *fields;
> +
> +   nir_variable *var;
> +};
> +
> +static const struct glsl_type *
> +wrap_type_in_array(const struct glsl_type *type,
> +   const struct glsl_type *array_type)
> +{
> +   if (!glsl_type_is_array(array_type))
> +  return type;
> +
> +   const struct glsl_type *elem_type =
> +  wrap_type_in_array(type, glsl_get_array_element(array_type));
> +   return glsl_array_type(elem_type, glsl_get_length(array_type));
> +}
> +
> +static void
> +init_field_for_type(struct field *field, struct field *parent,
> +const struct glsl_type *type,
> +const char *name,
> +struct split_var_state *state)
> +{
> +   *field = (struct field) {
> +  .parent = parent,
> +  .type = type,
> +   };
> +
> +   const struct glsl_type *struct_type = glsl_without_array(type);
> +   if (glsl_type_is_struct(struct_type)) {
> +  field->num_fields = glsl_get_length(struct_type),

Should be semicolon at the end here?

> +  field->fields = ralloc_array(state->mem_ctx, struct field,
> +   field->num_fields);
> +  for (unsigned i = 0; i < field->num_fields; i++) {
> + char *field_name = NULL;
> + if (name) {
> +ralloc_asprintf(state->mem_ctx, "%s_%s", name,
> +glsl_get_struct_elem_name(struct_type, i));
> + }

Re: [Mesa-dev] [PATCH 05/12] nir/types: Add array_or_matrix helpers

2018-07-26 Thread Thomas Helland
This patch is:

Reviewed-by: Thomas Helland

2018-07-26 18:00 GMT+02:00 Jason Ekstrand :
> ---
>  src/compiler/nir_types.cpp | 15 +++
>  src/compiler/nir_types.h   |  2 ++
>  2 files changed, 17 insertions(+)
>
> diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
> index 3a3864414f3..c6f30368c95 100644
> --- a/src/compiler/nir_types.cpp
> +++ b/src/compiler/nir_types.cpp
> @@ -50,6 +50,15 @@ glsl_without_array(const glsl_type *type)
> return type->without_array();
>  }
>
> +const glsl_type *
> +glsl_without_array_or_matrix(const glsl_type *type)
> +{
> +   type = type->without_array();
> +   if (type->is_matrix())
> +  type = type->column_type();
> +   return type;
> +}
> +
>  const glsl_type *
>  glsl_get_array_instance(const glsl_type *type,
>  unsigned array_size)
> @@ -224,6 +233,12 @@ glsl_type_is_array_of_arrays(const struct glsl_type 
> *type)
> return type->is_array_of_arrays();
>  }
>
> +bool
> +glsl_type_is_array_or_matrix(const struct glsl_type *type)
> +{
> +   return type->is_array() || type->is_matrix();
> +}
> +
>  bool
>  glsl_type_is_struct(const struct glsl_type *type)
>  {
> diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
> index 817b7a9b345..d29edd1d6c5 100644
> --- a/src/compiler/nir_types.h
> +++ b/src/compiler/nir_types.h
> @@ -48,6 +48,7 @@ const struct glsl_type *glsl_get_struct_field(const struct 
> glsl_type *type,
>
>  const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
>  const struct glsl_type *glsl_without_array(const struct glsl_type *type);
> +const struct glsl_type *glsl_without_array_or_matrix(const struct glsl_type 
> *type);
>  const struct glsl_type *glsl_get_array_instance(const struct glsl_type *type,
>  unsigned array_size);
>
> @@ -131,6 +132,7 @@ bool glsl_type_is_vector_or_scalar(const struct glsl_type 
> *type);
>  bool glsl_type_is_matrix(const struct glsl_type *type);
>  bool glsl_type_is_array(const struct glsl_type *type);
>  bool glsl_type_is_array_of_arrays(const struct glsl_type *type);
> +bool glsl_type_is_array_or_matrix(const struct glsl_type *type);
>  bool glsl_type_is_struct(const struct glsl_type *type);
>  bool glsl_type_is_sampler(const struct glsl_type *type);
>  bool glsl_type_is_image(const struct glsl_type *type);
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/12] nir/print: Remove a bogus assert

2018-07-26 Thread Thomas Helland
This is already fixed in master it seems

2018-07-26 17:59 GMT+02:00 Jason Ekstrand :
> In 1beef89ad85c47fb6, we made a bunch of changes to NIR to allow for
> more than four components.  This assert was added to trigger if we ever
> saw a vec16 input variable.  However, it didn't take into account the
> fact that we can get matrices as input/output variables and they break
> this assumption without breaking the code that follows.
>
> Fixes: 1beef89ad85c4 "nir: prepare for bumping up max components to 16"
> ---
>  src/compiler/nir/nir_print.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> index 93d1c02f23d..7cb16abd146 100644
> --- a/src/compiler/nir/nir_print.c
> +++ b/src/compiler/nir/nir_print.c
> @@ -491,7 +491,6 @@ print_var_decl(nir_variable *var, print_state *state)
>switch (var->data.mode) {
>case nir_var_shader_in:
>case nir_var_shader_out:
> - assert(num_components <= 4);
>   if (num_components < 4 && num_components != 0) {
>  const char *xyzw = "xyzw";
>  for (int i = 0; i < num_components; i++)
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/12] nir/instr_set: Fix nir_instrs_equal for derefs

2018-07-26 Thread Thomas Helland
This is:
Reviewed-by: Thomas Helland

2018-07-26 18:00 GMT+02:00 Jason Ekstrand :
> We weren't returning at the end of the nir_isntr_type_deref case in
> nir_instrs_equal and it was falling through to the default of false.
> While we're at it, make the default unreachable because all statements
> in the switch now have their own returns.  Had we done that before, we
> would have caught this bug a long time ago.
>
> Fixes: 19a4662a540a8c94 "nir: Add a deref instruction type"
> ---
>  src/compiler/nir/nir_instr_set.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/nir/nir_instr_set.c 
> b/src/compiler/nir/nir_instr_set.c
> index 42aa61808ac..19771fcd9dd 100644
> --- a/src/compiler/nir/nir_instr_set.c
> +++ b/src/compiler/nir/nir_instr_set.c
> @@ -357,7 +357,7 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr 
> *instr2)
>default:
>   unreachable("Invalid instruction deref type");
>}
> -  break;
> +  return true;
> }
> case nir_instr_type_tex: {
>nir_tex_instr *tex1 = nir_instr_as_tex(instr1);
> @@ -460,7 +460,7 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr 
> *instr2)
>unreachable("Invalid instruction type");
> }
>
> -   return false;
> +   unreachable("All cases in the above switch should return");
>  }
>
>  static bool
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/12] util/list: Make some helpers take const lists

2018-07-26 Thread Thomas Helland
This is:
Reviewed-by: Thomas Helland

26. jul. 2018 18.04 skrev "Jason Ekstrand" :

They're all just querying things about the list and not mutating
anything.
---
 src/util/list.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/util/list.h b/src/util/list.h
index 6edb7501109..09d1b4cae64 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -72,7 +72,7 @@ static inline void list_addtail(struct list_head *item,
struct list_head *list)
 list->prev = item;
 }

-static inline bool list_empty(struct list_head *list);
+static inline bool list_empty(const struct list_head *list);

 static inline void list_replace(struct list_head *from, struct list_head
*to)
 {
@@ -101,7 +101,7 @@ static inline void list_delinit(struct list_head *item)
 item->prev = item;
 }

-static inline bool list_empty(struct list_head *list)
+static inline bool list_empty(const struct list_head *list)
 {
return list->next == list;
 }
@@ -114,7 +114,7 @@ static inline bool list_is_singular(const struct
list_head *list)
return list->next != NULL && list->next != list && list->next->next ==
list;
 }

-static inline unsigned list_length(struct list_head *list)
+static inline unsigned list_length(const struct list_head *list)
 {
struct list_head *node;
unsigned length = 0;
@@ -145,7 +145,7 @@ static inline void list_splicetail(struct list_head
*src, struct list_head *dst)
dst->prev = src->prev;
 }

-static inline void list_validate(struct list_head *list)
+static inline void list_validate(const struct list_head *list)
 {
struct list_head *node;
assert(list->next->prev == list && list->prev->next == list);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/11] util/string_buffer: fix warning in tests

2018-07-16 Thread Thomas Helland
This patch is

Reviewed-by: Thomas Helland 

Den tir. 17. jul. 2018, 01:40 skrev Caio Marcelo de Oliveira Filho <
caio.olive...@intel.com>:

> And also specify the maximum size when writing to static buffers. The
> warning below refers to the case where "str5" could be larger than
> "str5 - str4", then the strcat would have overlapping dst and src.
>
> Compiler doesn't pick up the bound from the snprintf above, so we make
> clear the bounds of str5 by using strncat() instead of strcat().
>
> ../../src/util/tests/string_buffer/string_buffer_test.cpp: In member
> function ‘virtual void string_buffer_string_buffer_tests_Test::TestBody()’:
> ../../src/util/tests/string_buffer/string_buffer_test.cpp:106:10: warning:
> ‘char* strcat(char*, const char*)’ accessing 81 or more bytes at offsets 48
> and 128 may overlap 1 byte at offset 128 [-Wrestrict]
> strcat(str4, str5);
> ~~^~~~
> ---
>  src/util/tests/string_buffer/string_buffer_test.cpp | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/util/tests/string_buffer/string_buffer_test.cpp
> b/src/util/tests/string_buffer/string_buffer_test.cpp
> index 545f607fadd..afb6dfb2a19 100644
> --- a/src/util/tests/string_buffer/string_buffer_test.cpp
> +++ b/src/util/tests/string_buffer/string_buffer_test.cpp
> @@ -95,15 +95,15 @@ TEST_F(string_buffer, string_buffer_tests)
> EXPECT_TRUE(strlen(buf->buf) == 0);
>
> /* Test a string with some formatting */
> -   sprintf(str4, "Testing formatting %d, %f", 100, 1.0);
> +   snprintf(str4, sizeof(str4), "Testing formatting %d, %f", 100, 1.0);
> EXPECT_TRUE(_mesa_string_buffer_printf(buf, "Testing formatting %d,
> %f", 100, 1.0));
> EXPECT_TRUE(strcmp(buf->buf, str4) == 0);
>
> /* Compile a string with some other formatting */
> -   sprintf(str5, "Testing formatting %d, %x", 100, 0xDEADBEAF);
> +   snprintf(str5, sizeof(str5), "Testing formatting %d, %x", 100,
> 0xDEADBEAF);
>
> /* Concatenate str5 to str4 */
> -   strcat(str4, str5);
> +   strncat(str4, str5, sizeof(str5));
>
> /* Now use the formatted append function again */
> EXPECT_TRUE(_mesa_string_buffer_printf(buf, "Testing formatting %d,
> %x", 100, 0xDEADBEAF));
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glsl: slim the kill_entry struct used in const propagation

2018-07-10 Thread Thomas Helland
2018-07-09 20:59 GMT+02:00 Caio Marcelo de Oliveira Filho
:
> Since 4654439fdd7 "glsl: Use hash tables for
> opt_constant_propagation() kill sets." uses a hash_table for storing
> kill_entries, so the structs can be simplified.
>
> Remove the exec_node from kill_entry since it is not used in an
> exec_list anymore.
>
> Remove the 'var' from kill_entry since it is now redundant with the
> key of the hash table.
>
> Suggested by Eric Anholt.

These are both:

Reviewed-by: Thomas Helland

> ---
>  src/compiler/glsl/opt_constant_propagation.cpp | 13 +
>  1 file changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/src/compiler/glsl/opt_constant_propagation.cpp 
> b/src/compiler/glsl/opt_constant_propagation.cpp
> index 05dc71efb72..f91498b45cd 100644
> --- a/src/compiler/glsl/opt_constant_propagation.cpp
> +++ b/src/compiler/glsl/opt_constant_propagation.cpp
> @@ -77,20 +77,17 @@ public:
>  };
>
>
> -class kill_entry : public exec_node
> +class kill_entry
>  {
>  public:
> /* override operator new from exec_node */
> DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
>
> -   kill_entry(ir_variable *var, unsigned write_mask)
> +   explicit kill_entry(unsigned write_mask)
> {
> -  assert(var);
> -  this->var = var;
>this->write_mask = write_mask;
> }
>
> -   ir_variable *var;
> unsigned write_mask;
>  };
>
> @@ -386,7 +383,7 @@ 
> ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
> hash_entry *htk;
> hash_table_foreach(new_kills, htk) {
>kill_entry *k = (kill_entry *) htk->data;
> -  kill(k->var, k->write_mask);
> +  kill((ir_variable *) htk->key, k->write_mask);
> }
>  }
>
> @@ -433,7 +430,7 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
> hash_entry *htk;
> hash_table_foreach(new_kills, htk) {
>kill_entry *k = (kill_entry *) htk->data;
> -  kill(k->var, k->write_mask);
> +  kill((ir_variable *) htk->key, k->write_mask);
> }
>
> /* already descended into the children. */
> @@ -469,7 +466,7 @@ ir_constant_propagation_visitor::kill(ir_variable *var, 
> unsigned write_mask)
> }
> /* Not already in the hash table.  Make new entry. */
> _mesa_hash_table_insert(this->kills, var,
> -   new(this->lin_ctx) kill_entry(var, write_mask));
> +   new(this->lin_ctx) kill_entry(write_mask));
>  }
>
>  /**
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/math: Allocate memory for GLmatrix elements and its inverse contiguously

2018-04-19 Thread Thomas Helland
2018-04-19 20:08 GMT+02:00 Vlad Golovkin <vlad.golovkin.m...@gmail.com>:
> -- Forwarded message --
> From: Vlad Golovkin <vlad.golovkin.m...@gmail.com>
> Date: 2018-04-19 21:06 GMT+03:00
> Subject: Re: [Mesa-dev] [PATCH] mesa/math: Allocate memory for
> GLmatrix elements and its inverse contiguously
> To: Thomas Helland <thomashellan...@gmail.com>
>
>
> 2018-04-17 8:55 GMT+03:00 Thomas Helland <thomashellan...@gmail.com>:
>> Hi, and thanks for the patch =)
>>
>> Have you done any performance testing on this to verify it
>> gives us a speedup of any kind? I'm asking because it seems like
>> this might be something that a decent compiler should be able to do.
>> Performance related patches, at least in core mesa, usually have
>> some justification with benchmark numbers in the commit message.
>
> Hi,
> I examined the resulting assembly for these 3 functions and it turns
> out that compiler wasn't merging these two blocks of memory into one
> (which compiler does that?).
> gcc tried to unroll memcpys to a series of movs which may seem to
> partially defeat the purpose of this patch, but after copying the
> block corresponding to m->m it had to switch destination and source
> registers to the next block resulting in 2 wasted movs.
> As a result we can save malloc and free call (in _math_matrix_ctr and
> _math_matrix_dtr) and 2 movs (when compiler tries to avoid memcpy -
> best case) or 1 memcpy call (in the worst case). It may seem that 2nd
> malloc can place m->inv in memory right after m->m but: 1) compiler
> can't rely on that behaviour 2) allocator will insert some private
> data before each block leading to more cache misses.
> I made some testing with Torcs and Yo Frankie blender game and
> according to perf in Yo Frankie _math_matrix_copy overhead reduced by
> 0.03% - 0.04% while Torcs didn't see any improvement.
>

Good analysis! While the gains are not huge, its probably worthwhile.
With some of the comments adressed this has my RB.
I'll pull it down this weekend, and add the comments if you don't
beat me to it, and then I'll push with my RB once we are past the
18.1 branching. Thanks for the patch =)

> Sorry for the duplicate emails.
>
>> Some style comments below
>>
>> 2018-04-17 1:03 GMT+02:00 Vlad Golovkin <vlad.golovkin.m...@gmail.com>:
>>> When GLmatrix elements and its inverse are stored contiguously in memory it 
>>> is possible to
>>> allocate, free and copy these fields with 1 function call instead of 2.
>>> ---
>>>  src/mesa/math/m_matrix.c | 15 +--
>>>  1 file changed, 9 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
>>> index 57a49533de..4ab78a1fb3 100644
>>> --- a/src/mesa/math/m_matrix.c
>>> +++ b/src/mesa/math/m_matrix.c
>>> @@ -1438,8 +1438,7 @@ _math_matrix_is_dirty( const GLmatrix *m )
>>>  void
>>>  _math_matrix_copy( GLmatrix *to, const GLmatrix *from )
>>>  {
>>> -   memcpy(to->m, from->m, 16 * sizeof(GLfloat));
>>> -   memcpy(to->inv, from->inv, 16 * sizeof(GLfloat));
>>> +   memcpy(to->m, from->m, 16 * 2 * sizeof(GLfloat));
>>> to->flags = from->flags;
>>> to->type = from->type;
>>>  }
>>> @@ -1470,12 +1469,17 @@ _math_matrix_loadf( GLmatrix *mat, const GLfloat *m 
>>> )
>>>  void
>>>  _math_matrix_ctr( GLmatrix *m )
>>>  {
>>> -   m->m = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
>>> +   m->m = _mesa_align_malloc( 16 * 2 * sizeof(GLfloat), 16 );
>>> if (m->m)
>>> +   {
>>
>> Our style guides says to keep the curly bracket after an if on the same line.
>>
>>> +  m->inv = m->m + 16;
>>>memcpy( m->m, Identity, sizeof(Identity) );
>>> -   m->inv = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
>>> -   if (m->inv)
>>>memcpy( m->inv, Identity, sizeof(Identity) );
>>> +   }
>>> +   else
>>> +   {
>>
>> } else {
>>
>> Although I see that this file defaults to;
>>
>> {
>> else {
>>
>> for some reason. Feel free to follow existing style, or adjust to my 
>> comments.
>> Also, if we want to do this change it deserves a comment in the source.
>>> +  m->inv = NULL;
>>> +   }
>>> m->type = MATRIX_IDENTITY;
>>> m->flags = 0;
>>>  }
>>> @@ -1493,7 +1497,6 @@ _math_matrix_dtr( GLmatrix *m )
>>> _mesa_align_free( m->m );
>>> m->m = NULL;
>>>
>>> -   _mesa_align_free( m->inv );
>>> m->inv = NULL;
>>>  }
>>>
>>> --
>>> 2.14.1
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/math: Allocate memory for GLmatrix elements and its inverse contiguously

2018-04-16 Thread Thomas Helland
Hi, and thanks for the patch =)

Have you done any performance testing on this to verify it
gives us a speedup of any kind? I'm asking because it seems like
this might be something that a decent compiler should be able to do.
Performance related patches, at least in core mesa, usually have
some justification with benchmark numbers in the commit message.
Some style comments below

2018-04-17 1:03 GMT+02:00 Vlad Golovkin :
> When GLmatrix elements and its inverse are stored contiguously in memory it 
> is possible to
> allocate, free and copy these fields with 1 function call instead of 2.
> ---
>  src/mesa/math/m_matrix.c | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
> index 57a49533de..4ab78a1fb3 100644
> --- a/src/mesa/math/m_matrix.c
> +++ b/src/mesa/math/m_matrix.c
> @@ -1438,8 +1438,7 @@ _math_matrix_is_dirty( const GLmatrix *m )
>  void
>  _math_matrix_copy( GLmatrix *to, const GLmatrix *from )
>  {
> -   memcpy(to->m, from->m, 16 * sizeof(GLfloat));
> -   memcpy(to->inv, from->inv, 16 * sizeof(GLfloat));
> +   memcpy(to->m, from->m, 16 * 2 * sizeof(GLfloat));
> to->flags = from->flags;
> to->type = from->type;
>  }
> @@ -1470,12 +1469,17 @@ _math_matrix_loadf( GLmatrix *mat, const GLfloat *m )
>  void
>  _math_matrix_ctr( GLmatrix *m )
>  {
> -   m->m = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
> +   m->m = _mesa_align_malloc( 16 * 2 * sizeof(GLfloat), 16 );
> if (m->m)
> +   {

Our style guides says to keep the curly bracket after an if on the same line.

> +  m->inv = m->m + 16;
>memcpy( m->m, Identity, sizeof(Identity) );
> -   m->inv = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
> -   if (m->inv)
>memcpy( m->inv, Identity, sizeof(Identity) );
> +   }
> +   else
> +   {

} else {

Although I see that this file defaults to;

{
else {

for some reason. Feel free to follow existing style, or adjust to my comments.
Also, if we want to do this change it deserves a comment in the source.
> +  m->inv = NULL;
> +   }
> m->type = MATRIX_IDENTITY;
> m->flags = 0;
>  }
> @@ -1493,7 +1497,6 @@ _math_matrix_dtr( GLmatrix *m )
> _mesa_align_free( m->m );
> m->m = NULL;
>
> -   _mesa_align_free( m->inv );
> m->inv = NULL;
>  }
>
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/18] [RFC] Pointer specific data structures

2018-04-13 Thread Thomas Helland
2018-04-12 20:07 GMT+02:00 Eric Anholt <e...@anholt.net>:
> Erik Faye-Lund <kusmab...@gmail.com> writes:
>
>> On Wed, Apr 11, 2018 at 8:48 PM, Thomas Helland
>> <thomashellan...@gmail.com> wrote:
>>> This series came about when I saw a talk online, while simultaneously
>>> being annoyd about the needless waste of memory in our set as reported
>>> by pahole. I have previously made some patches that changed our hash
>>> table from a reprobing one to a quadratic probing one, in the name of
>>> lower overhead and better cache locality, but I was not quite satisfied.
>>>
>>> I'm sending this series out now, as it seems like an ideal time since
>>> Timothy is working at reducing our compile times. Further details about
>>> the implementation and its advantages are described in the patches.
>>> I've found this to give a reduction in shader-db runtime of about 2%,
>>> but I have to do some more testing on my main computer, as my laptop
>>> is showing its age with some terrible thermal issues.
>>>
>>> This special cases on pointers, as that is a very common usecase.
>>> This allows us to drop some comparisons, and reduce the total size
>>> of our hash table to 70% or our current and the set to 50%. It uses
>>> linear probing and power-of-two table sizes to get good cache locality.
>>> In the pointer_map caes it moves the stored hashes out into it's own
>>> array for even better cache locality.
>>>
>>> I'm not sure if we want another set and map amongst our utils,
>>> but the patch series is simple enough, and complete enough,
>>> that I thought I could share it for some inital comments.
>>
>> This approach gives me a bad feeling. Using memory addresses for
>> storage ordering in a compiler is not quite nice; it can easily mask
>> spurious bugs, and have a compiler produce different result each run.
>> Such compilers are not nice to work with. I've seen *exactly* this
>> use-case go wrong in the past.
>
> I've got bad news for you about what we're already doing in
> _mesa_hash_pointer().
>
> I'm generally interested in this series, though a completely new
> implementation without unit tests is less interesting to me.  How much
> do we get from just having a pointer map forked off of hash_table.c with
> the ->hash removed?

Yeah, it would obviously need to be accompanied by a bunch of tests.
I'm not even sure that this is the best way. We might, as you suggest,
want to settle on something in between. I should have probably
mentioned that when I run shader-db under "perf stat" the number of
executed instructions is roughly the same, but stalled cycles is reduced.
This lead me to the conclusion that the speedups are mostly
related to better cache locality and possibly also better speculative
code execution on the processor side. I see multiple alternatives;

- Fork of the existing hash table with hash removed
- Change the existing hash table to quadratic probing
  (I believe I tested this recently and concluded there where no gains)
- Change the existing hash table to storing the hash in a separate array
  (This should still give us some of the advantages)
- Separate array for hash + linear probing in existing hash table
- Some other combination of the above

I believe at least some of these alternatives will lead to having to
rewrite some uses of the hash table, as they assume things about
the implementation, although I can't quite remember. I can hack
together some other alternatives to see where that gets us.
Obviously, If we can modify the existing implementation and get
most of the benefit of the other changes that might be preferred,
as we would get all uses in one go, instead of having to migrate.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/18] util: Just cut the hash in the pointer table

2018-04-11 Thread Thomas Helland
Meant for testing. Defeats some of the benefits of the implementation,
however it still seems to be better than the current hash table,
and the complexity is undeniably very low.
---
 src/util/pointer_map.c | 99 +-
 src/util/pointer_map.h |  1 -
 2 files changed, 33 insertions(+), 67 deletions(-)

diff --git a/src/util/pointer_map.c b/src/util/pointer_map.c
index 463fa19282..7632218b91 100644
--- a/src/util/pointer_map.c
+++ b/src/util/pointer_map.c
@@ -39,28 +39,25 @@
 #include "ralloc.h"
 #include "macros.h"
 
-static inline uint8_t
-get_hash(uint8_t *metadata)
-{
-   return *metadata & 0x7F;
-}
+static const uint32_t deleted_key_value;
+static const void *deleted_key = _key_value;
 
-static inline void
-set_hash(uint8_t *metadata, uint32_t hash)
+static bool
+entry_is_free(const struct map_entry *entry)
 {
-   *metadata = (*metadata & ~0x7F) | (((uint8_t) hash) & 0x7F);
+   return entry->key == NULL;
 }
 
-static inline bool
-entry_is_free(uint8_t *metadata)
+static bool
+entry_is_deleted(const struct pointer_map *pm, struct map_entry *entry)
 {
-   return !(*metadata >> 7);
+   return entry->key == pm->deleted_key;
 }
 
-static inline void
-set_occupied(uint8_t *metadata, bool occupied)
+static bool
+entry_is_present(const struct pointer_map *pm, struct map_entry *entry)
 {
-   *metadata = occupied ? *metadata | 0x80 : *metadata & 0x7F;
+   return entry->key != NULL && entry->key != pm->deleted_key;
 }
 
 static inline uint32_t
@@ -70,15 +67,6 @@ hash_pointer(const void *pointer)
return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14));
 }
 
-static bool
-entry_is_deleted(struct pointer_map *map, uint8_t *metadata)
-{
-   if (get_hash(metadata) != 0)
-  return false;
-
-   return map->map[metadata - map->metadata].key == NULL;
-}
-
 struct pointer_map *
 _mesa_pointer_map_create(void *mem_ctx)
 {
@@ -91,9 +79,9 @@ _mesa_pointer_map_create(void *mem_ctx)
map->size = 1 << 4;
map->max_entries = map->size * 0.6;
map->map = rzalloc_array(map, struct map_entry, map->size);
-   map->metadata = rzalloc_array(map, uint8_t, map->size);
map->entries = 0;
map->deleted_entries = 0;
+   map->deleted_key = deleted_key;
 
if (map->map == NULL) {
   ralloc_free(map);
@@ -113,15 +101,13 @@ _mesa_pointer_map_clone(struct pointer_map *src, void 
*dst_mem_ctx)
memcpy(pm, src, sizeof(struct pointer_map));
 
pm->map = ralloc_array(pm, struct map_entry, pm->size);
-   pm->metadata = ralloc_array(pm, uint8_t, pm->size);
 
-   if (pm->map == NULL || pm->metadata == NULL) {
+   if (pm->map == NULL) {
   ralloc_free(pm);
   return NULL;
}
 
memcpy(pm->map, src->map, pm->size * sizeof(struct map_entry));
-   memcpy(pm->metadata, src->metadata, pm->size * sizeof(uint8_t));
 
return pm;
 }
@@ -154,7 +140,6 @@ _mesa_pointer_map_destroy(struct pointer_map *map,
 void
 _mesa_pointer_map_clear(struct pointer_map *map)
 {
-   memset(map->metadata, 0, map->size * sizeof(uint8_t));
memset(map->map, 0, sizeof(struct map_entry) * map->size);
map->entries = 0;
map->deleted_entries = 0;
@@ -173,15 +158,14 @@ _mesa_pointer_map_search(struct pointer_map *map, const 
void *key)
uint32_t start_hash_address = hash & (map->size - 1);
uint32_t hash_address = start_hash_address;
 
+   struct map_entry *entry = NULL;
do {
-  uint8_t *metadata = map->metadata + hash_address;
+  entry = map->map + hash_address;
 
-  if (entry_is_free(metadata)) {
+  if (entry_is_free(entry)) {
  return NULL;
-  } else if (get_hash(metadata) == (hash & 0x7F)) {
- if (map->map[hash_address].key == key) {
-return >map[hash_address];
- }
+  } else if (entry->key == key) {
+ return entry;
   }
 
   hash_address = (hash_address + 1) & (map->size - 1);
@@ -195,7 +179,6 @@ _mesa_pointer_map_rehash(struct pointer_map *map, unsigned 
new_size)
 {
struct pointer_map old_map;
struct map_entry *map_entries, *entry;
-   uint8_t *metadatas;
 
old_map = *map;
 
@@ -206,12 +189,7 @@ _mesa_pointer_map_rehash(struct pointer_map *map, unsigned 
new_size)
if (map_entries == NULL)
   return;
 
-   metadatas = rzalloc_array(map, uint8_t, map->size);
-   if (metadatas == NULL)
-  return;
-
map->map = map_entries;
-   map->metadata = metadatas;
map->entries = 0;
map->deleted_entries = 0;
 
@@ -220,7 +198,6 @@ _mesa_pointer_map_rehash(struct pointer_map *map, unsigned 
new_size)
}
 
ralloc_free(old_map.map);
-   ralloc_free(old_map.metadata);
 }
 
 /**
@@ -232,7 +209,7 @@ struct map_entry *
 _mesa_pointer_map_insert(struct pointer_map *map, const void *key, void *data)
 {
uint32_t start_hash_address, hash_address, hash;
-   uint8_t *available_entry = NULL;
+   struct map_entry *available_entry = NULL;
assert(key != NULL);
 
if (map->entries >= map->max_entries) {
@@ -245,16 +222,17 @@ _mesa_pointer_map_insert(struct 

[Mesa-dev] [PATCH 14/18] nir: Migrate lower_vars_to_ssa to use pointer set

2018-04-11 Thread Thomas Helland
---
 src/compiler/nir/nir_lower_vars_to_ssa.c | 35 
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c 
b/src/compiler/nir/nir_lower_vars_to_ssa.c
index 3dfe48d6d3..988936ece8 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -30,6 +30,7 @@
 #include "nir_phi_builder.h"
 #include "nir_vla.h"
 #include "util/pointer_map.h"
+#include "util/pointer_set.h"
 
 
 struct deref_node {
@@ -45,9 +46,9 @@ struct deref_node {
nir_deref_var *deref;
struct exec_node direct_derefs_link;
 
-   struct set *loads;
-   struct set *stores;
-   struct set *copies;
+   struct pointer_set *loads;
+   struct pointer_set *stores;
+   struct pointer_set *copies;
 
struct nir_phi_builder_value *pb_value;
 
@@ -367,10 +368,9 @@ register_load_instr(nir_intrinsic_instr *load_instr,
   return;
 
if (node->loads == NULL)
-  node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+  node->loads = _mesa_pointer_set_create(state->dead_ctx);
 
-   _mesa_set_add(node->loads, load_instr);
+   _mesa_pointer_set_insert(node->loads, load_instr);
 }
 
 static void
@@ -382,10 +382,9 @@ register_store_instr(nir_intrinsic_instr *store_instr,
   return;
 
if (node->stores == NULL)
-  node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+  node->stores = _mesa_pointer_set_create(state->dead_ctx);
 
-   _mesa_set_add(node->stores, store_instr);
+   _mesa_pointer_set_insert(node->stores, store_instr);
 }
 
 static void
@@ -400,10 +399,9 @@ register_copy_instr(nir_intrinsic_instr *copy_instr,
  continue;
 
   if (node->copies == NULL)
- node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ node->copies = _mesa_pointer_set_create(state->dead_ctx);
 
-  _mesa_set_add(node->copies, copy_instr);
+  _mesa_pointer_set_insert(node->copies, copy_instr);
}
 }
 
@@ -449,8 +447,8 @@ lower_copies_to_load_store(struct deref_node *node,
if (!node->copies)
   return true;
 
-   struct set_entry *copy_entry;
-   set_foreach(node->copies, copy_entry) {
+   struct pointer_set_entry *copy_entry;
+   _mesa_pointer_set_foreach(node->copies, copy_entry) {
   nir_intrinsic_instr *copy = (void *)copy_entry->key;
 
   nir_lower_var_copy_instr(copy, state->shader);
@@ -463,9 +461,10 @@ lower_copies_to_load_store(struct deref_node *node,
  if (arg_node == NULL || arg_node == node)
 continue;
 
- struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, 
copy);
+ struct pointer_set_entry *arg_entry =
+   _mesa_pointer_set_search(arg_node->copies, copy);
  assert(arg_entry);
- _mesa_set_remove(node->copies, arg_entry);
+ _mesa_pointer_set_remove(node->copies, arg_entry);
   }
 
   nir_instr_remove(>instr);
@@ -713,8 +712,8 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
   assert(node->deref->var->constant_initializer == NULL);
 
   if (node->stores) {
- struct set_entry *store_entry;
- set_foreach(node->stores, store_entry) {
+ struct pointer_set_entry *store_entry;
+ _mesa_pointer_set_foreach(node->stores, store_entry) {
 nir_intrinsic_instr *store =
(nir_intrinsic_instr *)store_entry->key;
 BITSET_SET(store_blocks, store->instr.block->index);
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/18] glsl: Use the pointer map in the glsl linker

2018-04-11 Thread Thomas Helland
---
 src/compiler/glsl/linker.cpp | 40 +++-
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index af09b7d03e..c549cac4b5 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -75,6 +75,7 @@
 #include "program/program.h"
 #include "util/mesa-sha1.h"
 #include "util/set.h"
+#include "util/pointer_map.h"
 #include "string_to_uint_map.h"
 #include "linker.h"
 #include "link_varyings.h"
@@ -1315,11 +1316,11 @@ populate_symbol_table(gl_linked_shader *sh, 
glsl_symbol_table *symbols)
  */
 static void
 remap_variables(ir_instruction *inst, struct gl_linked_shader *target,
-hash_table *temps)
+pointer_map *temps)
 {
class remap_visitor : public ir_hierarchical_visitor {
public:
- remap_visitor(struct gl_linked_shader *target, hash_table *temps)
+ remap_visitor(struct gl_linked_shader *target, pointer_map *temps)
   {
  this->target = target;
  this->symbols = target->symbols;
@@ -1330,7 +1331,7 @@ remap_variables(ir_instruction *inst, struct 
gl_linked_shader *target,
   virtual ir_visitor_status visit(ir_dereference_variable *ir)
   {
  if (ir->var->data.mode == ir_var_temporary) {
-hash_entry *entry = _mesa_hash_table_search(temps, ir->var);
+map_entry *entry = _mesa_pointer_map_search(temps, ir->var);
 ir_variable *var = entry ? (ir_variable *) entry->data : NULL;
 
 assert(var != NULL);
@@ -1357,7 +1358,7 @@ remap_variables(ir_instruction *inst, struct 
gl_linked_shader *target,
   struct gl_linked_shader *target;
   glsl_symbol_table *symbols;
   exec_list *instructions;
-  hash_table *temps;
+  pointer_map *temps;
};
 
remap_visitor v(target, temps);
@@ -1391,11 +1392,10 @@ static exec_node *
 move_non_declarations(exec_list *instructions, exec_node *last,
   bool make_copies, gl_linked_shader *target)
 {
-   hash_table *temps = NULL;
+   pointer_map *temps = NULL;
 
if (make_copies)
-  temps = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-  _mesa_key_pointer_equal);
+  temps = _mesa_pointer_map_create(NULL);
 
foreach_in_list_safe(ir_instruction, inst, instructions) {
   if (inst->as_function())
@@ -1414,7 +1414,7 @@ move_non_declarations(exec_list *instructions, exec_node 
*last,
  inst = inst->clone(target, NULL);
 
  if (var != NULL)
-_mesa_hash_table_insert(temps, var, inst);
+_mesa_pointer_map_insert(temps, var, inst);
  else
 remap_variables(inst, target, temps);
   } else {
@@ -1426,7 +1426,7 @@ move_non_declarations(exec_list *instructions, exec_node 
*last,
}
 
if (make_copies)
-  _mesa_hash_table_destroy(temps, NULL);
+  _mesa_pointer_map_destroy(temps, NULL);
 
return last;
 }
@@ -1441,14 +1441,13 @@ class array_sizing_visitor : public deref_type_updater {
 public:
array_sizing_visitor()
   : mem_ctx(ralloc_context(NULL)),
-unnamed_interfaces(_mesa_hash_table_create(NULL, _mesa_hash_pointer,
-   _mesa_key_pointer_equal))
+unnamed_interfaces(_mesa_pointer_map_create(NULL))
{
}
 
~array_sizing_visitor()
{
-  _mesa_hash_table_destroy(this->unnamed_interfaces, NULL);
+  _mesa_pointer_map_destroy(this->unnamed_interfaces, NULL);
   ralloc_free(this->mem_ctx);
}
 
@@ -1483,17 +1482,17 @@ public:
  /* Store a pointer to the variable in the unnamed_interfaces
   * hashtable.
   */
- hash_entry *entry =
-   _mesa_hash_table_search(this->unnamed_interfaces,
-   ifc_type);
+ map_entry *entry =
+   _mesa_pointer_map_search(this->unnamed_interfaces,
+ifc_type);
 
  ir_variable **interface_vars = entry ? (ir_variable **) entry->data : 
NULL;
 
  if (interface_vars == NULL) {
 interface_vars = rzalloc_array(mem_ctx, ir_variable *,
ifc_type->length);
-_mesa_hash_table_insert(this->unnamed_interfaces, ifc_type,
-interface_vars);
+_mesa_pointer_map_insert(this->unnamed_interfaces, ifc_type,
+ interface_vars);
  }
  unsigned index = ifc_type->field_index(var->name);
  assert(index < ifc_type->length);
@@ -1511,8 +1510,8 @@ public:
 */
void fixup_unnamed_interface_types()
{
-  hash_table_call_foreach(this->unnamed_interfaces,
-  fixup_unnamed_interface_type, NULL);
+  _mesa_pointer_map_call_foreach(this->unnamed_interfaces,
+ 

[Mesa-dev] [PATCH 12/18] nir: Use pointer map in nir_from_ssa

2018-04-11 Thread Thomas Helland
---
 src/compiler/nir/nir_from_ssa.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
index 1aa35509b1..e38c4fafd6 100644
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -28,6 +28,7 @@
 #include "nir.h"
 #include "nir_builder.h"
 #include "nir_vla.h"
+#include "util/pointer_map.h"
 
 /*
  * This file implements an out-of-SSA pass as described in "Revisiting
@@ -39,7 +40,7 @@ struct from_ssa_state {
nir_builder builder;
void *dead_ctx;
bool phi_webs_only;
-   struct hash_table *merge_node_table;
+   struct pointer_map *merge_node_map;
nir_instr *instr;
bool progress;
 };
@@ -120,8 +121,8 @@ merge_set_dump(merge_set *set, FILE *fp)
 static merge_node *
 get_merge_node(nir_ssa_def *def, struct from_ssa_state *state)
 {
-   struct hash_entry *entry =
-  _mesa_hash_table_search(state->merge_node_table, def);
+   struct map_entry *entry =
+  _mesa_pointer_map_search(state->merge_node_map, def);
if (entry)
   return entry->data;
 
@@ -135,7 +136,7 @@ get_merge_node(nir_ssa_def *def, struct from_ssa_state 
*state)
node->def = def;
exec_list_push_head(>nodes, >node);
 
-   _mesa_hash_table_insert(state->merge_node_table, def, node);
+   _mesa_pointer_map_insert(state->merge_node_map, def, node);
 
return node;
 }
@@ -467,8 +468,8 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
struct from_ssa_state *state = void_state;
nir_register *reg;
 
-   struct hash_entry *entry =
-  _mesa_hash_table_search(state->merge_node_table, def);
+   struct map_entry *entry =
+  _mesa_pointer_map_search(state->merge_node_map, def);
if (entry) {
   /* In this case, we're part of a phi web.  Use the web's register. */
   merge_node *node = (merge_node *)entry->data;
@@ -765,8 +766,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool 
phi_webs_only)
nir_builder_init(, impl);
state.dead_ctx = ralloc_context(NULL);
state.phi_webs_only = phi_webs_only;
-   state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+   state.merge_node_map = _mesa_pointer_map_create(NULL);
state.progress = false;
 
nir_foreach_block(block, impl) {
@@ -804,7 +804,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool 
phi_webs_only)
nir_metadata_dominance);
 
/* Clean up dead instructions and the hash tables */
-   _mesa_hash_table_destroy(state.merge_node_table, NULL);
+   _mesa_pointer_map_destroy(state.merge_node_map, NULL);
ralloc_free(state.dead_ctx);
return state.progress;
 }
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/18] glsl: Use pointer set in opt_copy_propagation

2018-04-11 Thread Thomas Helland
---
 src/compiler/glsl/opt_copy_propagation.cpp | 47 +-
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
b/src/compiler/glsl/opt_copy_propagation.cpp
index 7bcd8a090b..0195dc4e40 100644
--- a/src/compiler/glsl/opt_copy_propagation.cpp
+++ b/src/compiler/glsl/opt_copy_propagation.cpp
@@ -38,8 +38,7 @@
 #include "ir_optimization.h"
 #include "compiler/glsl_types.h"
 #include "util/pointer_map.h"
-#include "util/hash_table.h"
-#include "util/set.h"
+#include "util/pointer_set.h"
 
 namespace {
 
@@ -51,8 +50,7 @@ public:
   mem_ctx = ralloc_context(0);
   lin_ctx = linear_alloc_parent(mem_ctx, 0);
   acp = _mesa_pointer_map_create(mem_ctx);
-  kills = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
-   _mesa_key_pointer_equal);
+  kills = _mesa_pointer_set_create(mem_ctx);
   killed_all = false;
}
~ir_copy_propagation_visitor()
@@ -79,7 +77,7 @@ public:
/**
 * Set of ir_variables: Whose values were killed in this block.
 */
-   set *kills;
+   pointer_set *kills;
 
bool progress;
 
@@ -99,18 +97,17 @@ 
ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir)
 * main() at link time, so they're irrelevant to us.
 */
pointer_map *orig_acp = this->acp;
-   set *orig_kills = this->kills;
+   pointer_set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
acp = _mesa_pointer_map_create(NULL);
-   kills = _mesa_set_create(NULL, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+   kills = _mesa_pointer_set_create(NULL);
this->killed_all = false;
 
visit_list_elements(this, >body);
 
_mesa_pointer_map_destroy(acp, NULL);
-   _mesa_set_destroy(kills, NULL);
+   _mesa_pointer_set_destroy(kills, NULL);
 
this->kills = orig_kills;
this->acp = orig_acp;
@@ -209,11 +206,10 @@ void
 ir_copy_propagation_visitor::handle_if_block(exec_list *instructions)
 {
pointer_map *orig_acp = this->acp;
-   set *orig_kills = this->kills;
+   pointer_set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   kills = _mesa_set_create(NULL, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+   kills = _mesa_pointer_set_create(NULL);
this->killed_all = false;
 
/* Populate the initial acp with a copy of the original */
@@ -225,18 +221,18 @@ ir_copy_propagation_visitor::handle_if_block(exec_list 
*instructions)
   _mesa_pointer_map_clear(orig_acp);
}
 
-   set *new_kills = this->kills;
+   pointer_set *new_kills = this->kills;
this->kills = orig_kills;
_mesa_pointer_map_destroy(acp, NULL);
this->acp = orig_acp;
this->killed_all = this->killed_all || orig_killed_all;
 
-   struct set_entry *s_entry;
-   set_foreach(new_kills, s_entry) {
-  kill((ir_variable *) s_entry->key);
+   struct pointer_set_entry *pse;
+   _mesa_pointer_set_foreach(new_kills, pse) {
+  kill((ir_variable *) pse->key);
}
 
-   _mesa_set_destroy(new_kills, NULL);
+   _mesa_pointer_set_destroy(new_kills, NULL);
 }
 
 ir_visitor_status
@@ -255,11 +251,10 @@ void
 ir_copy_propagation_visitor::handle_loop(ir_loop *ir, bool keep_acp)
 {
pointer_map *orig_acp = this->acp;
-   set *orig_kills = this->kills;
+   pointer_set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   kills = _mesa_set_create(NULL, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+   kills = _mesa_pointer_set_create(NULL);
this->killed_all = false;
 
if (keep_acp) {
@@ -274,18 +269,18 @@ ir_copy_propagation_visitor::handle_loop(ir_loop *ir, 
bool keep_acp)
   _mesa_pointer_map_clear(orig_acp);
}
 
-   set *new_kills = this->kills;
+   pointer_set *new_kills = this->kills;
this->kills = orig_kills;
_mesa_pointer_map_destroy(acp, NULL);
this->acp = orig_acp;
this->killed_all = this->killed_all || orig_killed_all;
 
-   struct set_entry *entry;
-   set_foreach(new_kills, entry) {
-  kill((ir_variable *) entry->key);
+   struct pointer_set_entry *pse;
+   _mesa_pointer_set_foreach(new_kills, pse) {
+  kill((ir_variable *) pse->key);
}
 
-   _mesa_set_destroy(new_kills, NULL);
+   _mesa_pointer_set_destroy(new_kills, NULL);
 }
 
 ir_visitor_status
@@ -323,7 +318,7 @@ ir_copy_propagation_visitor::kill(ir_variable *var)
}
 
/* Add the LHS variable to the set of killed variables in this block. */
-   _mesa_set_add(kills, var);
+   _mesa_pointer_set_insert(kills, var);
 }
 
 /**
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/18] nir: Use pointer_set in nir_propagate_invariant

2018-04-11 Thread Thomas Helland
Should cut memory consumption approximately in half, while giving
us better cache locality and a simpler implementation.
---
 src/compiler/nir/nir_propagate_invariant.c | 33 +++---
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/compiler/nir/nir_propagate_invariant.c 
b/src/compiler/nir/nir_propagate_invariant.c
index 7b5bd6cce6..bc4c9f2465 100644
--- a/src/compiler/nir/nir_propagate_invariant.c
+++ b/src/compiler/nir/nir_propagate_invariant.c
@@ -22,14 +22,15 @@
  */
 
 #include "nir.h"
+#include "util/pointer_set.h"
 
 static void
-add_src(nir_src *src, struct set *invariants)
+add_src(nir_src *src, struct pointer_set *invariants)
 {
if (src->is_ssa) {
-  _mesa_set_add(invariants, src->ssa);
+  _mesa_pointer_set_insert(invariants, src->ssa);
} else {
-  _mesa_set_add(invariants, src->reg.reg);
+  _mesa_pointer_set_insert(invariants, src->reg.reg);
}
 }
 
@@ -41,17 +42,17 @@ add_src_cb(nir_src *src, void *state)
 }
 
 static bool
-dest_is_invariant(nir_dest *dest, struct set *invariants)
+dest_is_invariant(nir_dest *dest, struct pointer_set *invariants)
 {
if (dest->is_ssa) {
-  return _mesa_set_search(invariants, >ssa);
+  return _mesa_pointer_set_search(invariants, >ssa);
} else {
-  return _mesa_set_search(invariants, dest->reg.reg);
+  return _mesa_pointer_set_search(invariants, dest->reg.reg);
}
 }
 
 static void
-add_cf_node(nir_cf_node *cf, struct set *invariants)
+add_cf_node(nir_cf_node *cf, struct pointer_set *invariants)
 {
if (cf->type == nir_cf_node_if) {
   nir_if *if_stmt = nir_cf_node_as_if(cf);
@@ -63,19 +64,19 @@ add_cf_node(nir_cf_node *cf, struct set *invariants)
 }
 
 static void
-add_var(nir_variable *var, struct set *invariants)
+add_var(nir_variable *var, struct pointer_set *invariants)
 {
-   _mesa_set_add(invariants, var);
+   _mesa_pointer_set_insert(invariants, var);
 }
 
 static bool
-var_is_invariant(nir_variable *var, struct set * invariants)
+var_is_invariant(nir_variable *var, struct pointer_set *invariants)
 {
-   return var->data.invariant || _mesa_set_search(invariants, var);
+   return var->data.invariant || _mesa_pointer_set_search(invariants, var);
 }
 
 static void
-propagate_invariant_instr(nir_instr *instr, struct set *invariants)
+propagate_invariant_instr(nir_instr *instr, struct pointer_set *invariants)
 {
switch (instr->type) {
case nir_instr_type_alu: {
@@ -147,7 +148,8 @@ propagate_invariant_instr(nir_instr *instr, struct set 
*invariants)
 }
 
 static bool
-propagate_invariant_impl(nir_function_impl *impl, struct set *invariants)
+propagate_invariant_impl(nir_function_impl *impl,
+ struct pointer_set *invariants)
 {
bool progress = false;
 
@@ -181,8 +183,7 @@ bool
 nir_propagate_invariant(nir_shader *shader)
 {
/* Hash set of invariant things */
-   struct set *invariants = _mesa_set_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+   struct pointer_set *invariants = _mesa_pointer_set_create(NULL);
 
bool progress = false;
nir_foreach_function(function, shader) {
@@ -190,7 +191,7 @@ nir_propagate_invariant(nir_shader *shader)
  progress = true;
}
 
-   _mesa_set_destroy(invariants, NULL);
+   _mesa_pointer_set_destroy(invariants, NULL);
 
return progress;
 }
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/18] util: Add a pointer set implementation

2018-04-11 Thread Thomas Helland
This is a rework of our set for the common usecase of storing pointers.
We are currently storing the hash, and comparing the hash of the key
to the hash that is stored for the entry, plus comparing the key itself.
Seeing as comparing a pointer is cheap, this means we are doubling the
size of our set to do more work, which seems unnecessary. This therefore
implements a special case for a pointer set. It uses a design where we
use power of two sized tables, meaning we can simply do bitmasking
instead of modulo when fitting the hash to our table. We use linear
probing to build on the foundation of the improved cache locality.
The goal is to improve cache locality and memory footprint, and at the
same time reduce the amount of work done, and complexity.

V2: Use bitmask in pointer set as size is always 2^n
---
 src/util/meson.build   |   2 +
 src/util/pointer_set.c | 266 +
 src/util/pointer_set.h |  90 +
 3 files changed, 358 insertions(+)
 create mode 100644 src/util/pointer_set.c
 create mode 100644 src/util/pointer_set.h

diff --git a/src/util/meson.build b/src/util/meson.build
index 9b50647f34..b6f9db5484 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -50,6 +50,8 @@ files_mesa_util = files(
   'os_time.h',
   'pointer_map.c',
   'pointer_map.h',
+  'pointer_set.c',
+  'pointer_set.h',
   'sha1/sha1.c',
   'sha1/sha1.h',
   'ralloc.c',
diff --git a/src/util/pointer_set.c b/src/util/pointer_set.c
new file mode 100644
index 00..8d8eff4541
--- /dev/null
+++ b/src/util/pointer_set.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * Implements a linear probing set specifically for pointer keys.
+ * It does not store the hash, effectively cutting the size of the set in two.
+ * Some of the spared space is used to reduce load factor to 50%. It uses
+ * linear probing for good cache locality.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "pointer_set.h"
+#include "ralloc.h"
+#include "macros.h"
+
+static const uint32_t deleted_key_value;
+static const void *deleted_key = _key_value;
+
+static inline bool
+entry_is_free(struct pointer_set_entry *entry)
+{
+   return entry->key == NULL;
+}
+
+static inline uint32_t
+hash_pointer(const void *pointer)
+{
+   uintptr_t num = (uintptr_t) pointer;
+   return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14));
+}
+
+static inline bool
+entry_is_deleted(struct pointer_set_entry *entry)
+{
+   return entry->key == deleted_key;
+}
+
+static inline bool
+entry_is_present(struct pointer_set_entry *entry)
+{
+   return entry->key != NULL && entry->key != deleted_key;
+}
+
+struct pointer_set *
+_mesa_pointer_set_create(void *mem_ctx)
+{
+   struct pointer_set *set;
+
+   set = ralloc(mem_ctx, struct pointer_set);
+   if (set == NULL)
+  return NULL;
+
+   set->size = 1 << 4;
+   set->max_entries = set->size / 2;
+   set->keys = rzalloc_array(set, struct pointer_set_entry, set->size);
+   set->entries = 0;
+   set->deleted_entries = 0;
+
+   if (set->keys == NULL) {
+  ralloc_free(set);
+  return NULL;
+   }
+
+   return set;
+}
+
+/**
+ * Frees the pointer set.
+ */
+void
+_mesa_pointer_set_destroy(struct pointer_set* set,
+  void (*delete_function)(struct pointer_set_entry *entry))
+{
+   if (!set)
+  return;
+
+   if (delete_function) {
+  struct pointer_set_entry *entry;
+
+  _mesa_pointer_set_foreach(set, entry) {
+ delete_function(entry);
+  }
+   }
+
+   ralloc_free(set);
+}
+
+/**
+ * Finds a set entry with the given key.
+ *
+ * Returns NULL if no entry is found.  Note that the data pointer may be
+ * modified by the user.
+ */
+struct pointer_set_entry 

[Mesa-dev] [PATCH 06/18] nir: Change lower_vars_to_ssa to use pointer map

2018-04-11 Thread Thomas Helland
---
 src/compiler/nir/nir_lower_vars_to_ssa.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c 
b/src/compiler/nir/nir_lower_vars_to_ssa.c
index e8cfe308d2..3dfe48d6d3 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -29,6 +29,7 @@
 #include "nir_builder.h"
 #include "nir_phi_builder.h"
 #include "nir_vla.h"
+#include "util/pointer_map.h"
 
 
 struct deref_node {
@@ -61,7 +62,7 @@ struct lower_variables_state {
nir_function_impl *impl;
 
/* A hash table mapping variables to deref_node data */
-   struct hash_table *deref_var_nodes;
+   struct pointer_map *deref_var_nodes;
 
/* A hash table mapping fully-qualified direct dereferences, i.e.
 * dereferences with no indirect or wildcard array dereferences, to
@@ -114,14 +115,14 @@ get_deref_node_for_var(nir_variable *var, struct 
lower_variables_state *state)
 {
struct deref_node *node;
 
-   struct hash_entry *var_entry =
-  _mesa_hash_table_search(state->deref_var_nodes, var);
+   struct map_entry *var_entry =
+  _mesa_pointer_map_search(state->deref_var_nodes, var);
 
if (var_entry) {
   return var_entry->data;
} else {
   node = deref_node_create(NULL, var->type, state->dead_ctx);
-  _mesa_hash_table_insert(state->deref_var_nodes, var, node);
+  _mesa_pointer_map_insert(state->deref_var_nodes, var, node);
   return node;
}
 }
@@ -646,9 +647,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
state.dead_ctx = ralloc_context(state.shader);
state.impl = impl;
 
-   state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
-   _mesa_hash_pointer,
-   _mesa_key_pointer_equal);
+   state.deref_var_nodes = _mesa_pointer_map_create(state.dead_ctx);
exec_list_make_empty(_deref_nodes);
 
/* Build the initial deref structures and direct_deref_nodes table */
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/18] glsl: Change glsl_to_nir to user pointer map

2018-04-11 Thread Thomas Helland
---
 src/compiler/glsl/glsl_to_nir.cpp | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 80eb15f1ab..310b678680 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -32,6 +32,7 @@
 #include "compiler/nir/nir_control_flow.h"
 #include "compiler/nir/nir_builder.h"
 #include "main/imports.h"
+#include "util/pointer_map.h"
 
 /*
  * pass to lower GLSL IR to NIR
@@ -103,10 +104,10 @@ private:
bool is_global;
 
/* map of ir_variable -> nir_variable */
-   struct hash_table *var_table;
+   struct pointer_map *var_map;
 
/* map of ir_function_signature -> nir_function_overload */
-   struct hash_table *overload_table;
+   struct pointer_map *overload_map;
 };
 
 /*
@@ -191,10 +192,8 @@ nir_visitor::nir_visitor(nir_shader *shader)
this->supports_ints = shader->options->native_integers;
this->shader = shader;
this->is_global = true;
-   this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
-   this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-  _mesa_key_pointer_equal);
+   this->var_map = _mesa_pointer_map_create(NULL);
+   this->overload_map = _mesa_pointer_map_create(NULL);
this->result = NULL;
this->impl = NULL;
this->var = NULL;
@@ -205,8 +204,8 @@ nir_visitor::nir_visitor(nir_shader *shader)
 
 nir_visitor::~nir_visitor()
 {
-   _mesa_hash_table_destroy(this->var_table, NULL);
-   _mesa_hash_table_destroy(this->overload_table, NULL);
+   _mesa_pointer_map_destroy(this->var_map, NULL);
+   _mesa_pointer_map_destroy(this->overload_map, NULL);
 }
 
 nir_deref_var *
@@ -467,7 +466,7 @@ nir_visitor::visit(ir_variable *ir)
else
   nir_shader_add_variable(shader, var);
 
-   _mesa_hash_table_insert(var_table, ir, var);
+   _mesa_pointer_map_insert(var_map, ir, var);
this->var = var;
 }
 
@@ -491,7 +490,7 @@ nir_visitor::create_function(ir_function_signature *ir)
assert(ir->parameters.is_empty());
assert(ir->return_type == glsl_type::void_type);
 
-   _mesa_hash_table_insert(this->overload_table, ir, func);
+   _mesa_pointer_map_insert(this->overload_map, ir, func);
 }
 
 void
@@ -507,8 +506,8 @@ nir_visitor::visit(ir_function_signature *ir)
if (ir->is_intrinsic())
   return;
 
-   struct hash_entry *entry =
-  _mesa_hash_table_search(this->overload_table, ir);
+   struct map_entry *entry =
+  _mesa_pointer_map_search(this->overload_map, ir);
 
assert(entry);
nir_function *func = (nir_function *) entry->data;
@@ -1231,8 +1230,8 @@ nir_visitor::visit(ir_call *ir)
   return;
}
 
-   struct hash_entry *entry =
-  _mesa_hash_table_search(this->overload_table, ir->callee);
+   struct map_entry *entry =
+  _mesa_pointer_map_search(this->overload_map, ir->callee);
assert(entry);
nir_function *callee = (nir_function *) entry->data;
 
@@ -2174,8 +2173,8 @@ nir_visitor::visit(ir_constant *ir)
 void
 nir_visitor::visit(ir_dereference_variable *ir)
 {
-   struct hash_entry *entry =
-  _mesa_hash_table_search(this->var_table, ir->var);
+   struct map_entry *entry =
+  _mesa_pointer_map_search(this->var_map, ir->var);
assert(entry);
nir_variable *var = (nir_variable *) entry->data;
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/18] nir: Use pointer set in remove_dead_variable

2018-04-11 Thread Thomas Helland
This should simplify things, and cut the memory consumption of the
set effectively in half. Cache locality should also be better.
---
 src/compiler/nir/nir_remove_dead_variables.c | 37 ++--
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_remove_dead_variables.c 
b/src/compiler/nir/nir_remove_dead_variables.c
index eff66f92d4..ff78fc6c90 100644
--- a/src/compiler/nir/nir_remove_dead_variables.c
+++ b/src/compiler/nir/nir_remove_dead_variables.c
@@ -26,16 +26,17 @@
  */
 
 #include "nir.h"
+#include "util/pointer_set.h"
 
 static void
-add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live,
+add_var_use_intrinsic(nir_intrinsic_instr *instr, struct pointer_set *live,
   nir_variable_mode modes)
 {
unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
 
switch (instr->intrinsic) {
case nir_intrinsic_copy_var:
-  _mesa_set_add(live, instr->variables[1]->var);
+  _mesa_pointer_set_insert(live, instr->variables[1]->var);
   /* Fall through */
case nir_intrinsic_store_var: {
   /* The first source in both copy_var and store_var is the destination.
@@ -44,7 +45,7 @@ add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set 
*live,
*/
   nir_variable_mode mode = instr->variables[0]->var->data.mode;
   if (!(mode & (nir_var_local | nir_var_global | nir_var_shared)))
- _mesa_set_add(live, instr->variables[0]->var);
+ _mesa_pointer_set_insert(live, instr->variables[0]->var);
   break;
}
 
@@ -58,42 +59,42 @@ add_var_use_intrinsic(nir_intrinsic_instr *instr, struct 
set *live,
 
default:
   for (unsigned i = 0; i < num_vars; i++) {
- _mesa_set_add(live, instr->variables[i]->var);
+ _mesa_pointer_set_insert(live, instr->variables[i]->var);
   }
   break;
}
 }
 
 static void
-add_var_use_call(nir_call_instr *instr, struct set *live)
+add_var_use_call(nir_call_instr *instr, struct pointer_set *live)
 {
if (instr->return_deref != NULL) {
   nir_variable *var = instr->return_deref->var;
-  _mesa_set_add(live, var);
+  _mesa_pointer_set_insert(live, var);
}
 
for (unsigned i = 0; i < instr->num_params; i++) {
   nir_variable *var = instr->params[i]->var;
-  _mesa_set_add(live, var);
+  _mesa_pointer_set_insert(live, var);
}
 }
 
 static void
-add_var_use_tex(nir_tex_instr *instr, struct set *live)
+add_var_use_tex(nir_tex_instr *instr, struct pointer_set *live)
 {
if (instr->texture != NULL) {
   nir_variable *var = instr->texture->var;
-  _mesa_set_add(live, var);
+  _mesa_pointer_set_insert(live, var);
}
 
if (instr->sampler != NULL) {
   nir_variable *var = instr->sampler->var;
-  _mesa_set_add(live, var);
+  _mesa_pointer_set_insert(live, var);
}
 }
 
 static void
-add_var_use_shader(nir_shader *shader, struct set *live, nir_variable_mode 
modes)
+add_var_use_shader(nir_shader *shader, struct pointer_set *live, 
nir_variable_mode modes)
 {
nir_foreach_function(function, shader) {
   if (function->impl) {
@@ -123,7 +124,7 @@ add_var_use_shader(nir_shader *shader, struct set *live, 
nir_variable_mode modes
 }
 
 static void
-remove_dead_var_writes(nir_shader *shader, struct set *live)
+remove_dead_var_writes(nir_shader *shader)
 {
nir_foreach_function(function, shader) {
   if (!function->impl)
@@ -148,12 +149,12 @@ remove_dead_var_writes(nir_shader *shader, struct set 
*live)
 }
 
 static bool
-remove_dead_vars(struct exec_list *var_list, struct set *live)
+remove_dead_vars(struct exec_list *var_list, struct pointer_set *live)
 {
bool progress = false;
 
foreach_list_typed_safe(nir_variable, var, node, var_list) {
-  struct set_entry *entry = _mesa_set_search(live, var);
+  struct pointer_set_entry *entry = _mesa_pointer_set_search(live, var);
   if (entry == NULL) {
  /* Mark this variable as used by setting the mode to 0 */
  var->data.mode = 0;
@@ -169,8 +170,8 @@ bool
 nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes)
 {
bool progress = false;
-   struct set *live =
-  _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   struct pointer_set *live =
+  _mesa_pointer_set_create(NULL);
 
add_var_use_shader(shader, live, modes);
 
@@ -202,7 +203,7 @@ nir_remove_dead_variables(nir_shader *shader, 
nir_variable_mode modes)
}
 
if (progress) {
-  remove_dead_var_writes(shader, live);
+  remove_dead_var_writes(shader);
 
   nir_foreach_function(function, shader) {
  if (function->impl) {
@@ -212,6 +213,6 @@ nir_remove_dead_variables(nir_shader *shader, 
nir_variable_mode modes)
   }
}
 
-   _mesa_set_destroy(live, NULL);
+   _mesa_pointer_set_destroy(live, NULL);
return progress;
 }
-- 
2.16.2

___
mesa-dev mailing list

[Mesa-dev] [PATCH 10/18] util: Add a call_foreach function to the pointer map

2018-04-11 Thread Thomas Helland
---
 src/util/pointer_map.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/util/pointer_map.h b/src/util/pointer_map.h
index 4bfc306a5f..f92e67d40d 100644
--- a/src/util/pointer_map.h
+++ b/src/util/pointer_map.h
@@ -91,6 +91,19 @@ _mesa_pointer_map_next_entry(struct pointer_map *map,
 entry != NULL;  \
 entry = _mesa_pointer_map_next_entry(map, entry))
 
+static inline void
+_mesa_pointer_map_call_foreach(struct pointer_map *pm,
+   void (*callback)(const void *key,
+void *data,
+void *closure),
+   void *closure)
+{
+   struct map_entry *entry;
+
+   _mesa_pointer_map_foreach(pm, entry)
+  callback(entry->key, entry->data, closure);
+}
+
 #ifdef __cplusplus
 } /* extern C */
 #endif
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/18] glsl: Use pointer map in copy propagation

2018-04-11 Thread Thomas Helland
---
 src/compiler/glsl/opt_copy_propagation.cpp | 48 ++
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
b/src/compiler/glsl/opt_copy_propagation.cpp
index 6220aa86da..7bcd8a090b 100644
--- a/src/compiler/glsl/opt_copy_propagation.cpp
+++ b/src/compiler/glsl/opt_copy_propagation.cpp
@@ -37,6 +37,7 @@
 #include "ir_basic_block.h"
 #include "ir_optimization.h"
 #include "compiler/glsl_types.h"
+#include "util/pointer_map.h"
 #include "util/hash_table.h"
 #include "util/set.h"
 
@@ -49,8 +50,7 @@ public:
   progress = false;
   mem_ctx = ralloc_context(0);
   lin_ctx = linear_alloc_parent(mem_ctx, 0);
-  acp = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+  acp = _mesa_pointer_map_create(mem_ctx);
   kills = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
_mesa_key_pointer_equal);
   killed_all = false;
@@ -73,8 +73,8 @@ public:
void kill(ir_variable *ir);
void handle_if_block(exec_list *instructions);
 
-   /** Hash of lhs->rhs: The available copies to propagate */
-   hash_table *acp;
+   /** Map of lhs->rhs: The available copies to propagate */
+   pointer_map *acp;
 
/**
 * Set of ir_variables: Whose values were killed in this block.
@@ -98,19 +98,18 @@ 
ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir)
 * block.  Any instructions at global scope will be shuffled into
 * main() at link time, so they're irrelevant to us.
 */
-   hash_table *orig_acp = this->acp;
+   pointer_map *orig_acp = this->acp;
set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+   acp = _mesa_pointer_map_create(NULL);
kills = _mesa_set_create(NULL, _mesa_hash_pointer,
 _mesa_key_pointer_equal);
this->killed_all = false;
 
visit_list_elements(this, >body);
 
-   _mesa_hash_table_destroy(acp, NULL);
+   _mesa_pointer_map_destroy(acp, NULL);
_mesa_set_destroy(kills, NULL);
 
this->kills = orig_kills;
@@ -150,7 +149,7 @@ ir_copy_propagation_visitor::visit(ir_dereference_variable 
*ir)
if (this->in_assignee)
   return visit_continue;
 
-   struct hash_entry *entry = _mesa_hash_table_search(acp, ir->var);
+   struct map_entry *entry = _mesa_pointer_map_search(acp, ir->var);
if (entry) {
   ir->var = (ir_variable *) entry->data;
   progress = true;
@@ -185,7 +184,7 @@ ir_copy_propagation_visitor::visit_enter(ir_call *ir)
 * and out parameters).
 */
if (!ir->callee->is_intrinsic()) {
-  _mesa_hash_table_clear(acp, NULL);
+  _mesa_pointer_map_clear(acp);
   this->killed_all = true;
} else {
   if (ir->return_deref)
@@ -209,7 +208,7 @@ ir_copy_propagation_visitor::visit_enter(ir_call *ir)
 void
 ir_copy_propagation_visitor::handle_if_block(exec_list *instructions)
 {
-   hash_table *orig_acp = this->acp;
+   pointer_map *orig_acp = this->acp;
set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
@@ -218,17 +217,17 @@ ir_copy_propagation_visitor::handle_if_block(exec_list 
*instructions)
this->killed_all = false;
 
/* Populate the initial acp with a copy of the original */
-   acp = _mesa_hash_table_clone(orig_acp, NULL);
+   acp = _mesa_pointer_map_clone(orig_acp, NULL);
 
visit_list_elements(this, instructions);
 
if (this->killed_all) {
-  _mesa_hash_table_clear(orig_acp, NULL);
+  _mesa_pointer_map_clear(orig_acp);
}
 
set *new_kills = this->kills;
this->kills = orig_kills;
-   _mesa_hash_table_destroy(acp, NULL);
+   _mesa_pointer_map_destroy(acp, NULL);
this->acp = orig_acp;
this->killed_all = this->killed_all || orig_killed_all;
 
@@ -255,7 +254,7 @@ ir_copy_propagation_visitor::visit_enter(ir_if *ir)
 void
 ir_copy_propagation_visitor::handle_loop(ir_loop *ir, bool keep_acp)
 {
-   hash_table *orig_acp = this->acp;
+   pointer_map *orig_acp = this->acp;
set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
@@ -264,21 +263,20 @@ ir_copy_propagation_visitor::handle_loop(ir_loop *ir, 
bool keep_acp)
this->killed_all = false;
 
if (keep_acp) {
-  acp = _mesa_hash_table_clone(orig_acp, NULL);
+  acp = _mesa_pointer_map_clone(orig_acp, NULL);
} else {
-  acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+  acp = _mesa_pointer_map_create(NULL);
}
 
visit_list_elements(this, >body_instructions);
 
if (this->killed_all) {
-  _mesa_hash_table_clear(orig_acp, NULL);
+  _mesa_pointer_map_clear(orig_acp);
}
 
set *new_kills = this->kills;
this->kills = orig_kills;
-   _mesa_hash_table_destroy(acp, NULL);
+   

[Mesa-dev] [PATCH 08/18] glsl: Use pointer map in opt_constant_variable

2018-04-11 Thread Thomas Helland
---
 src/compiler/glsl/opt_constant_variable.cpp | 34 ++---
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/compiler/glsl/opt_constant_variable.cpp 
b/src/compiler/glsl/opt_constant_variable.cpp
index 914b46004c..d1d315af7a 100644
--- a/src/compiler/glsl/opt_constant_variable.cpp
+++ b/src/compiler/glsl/opt_constant_variable.cpp
@@ -37,6 +37,7 @@
 #include "ir_optimization.h"
 #include "compiler/glsl_types.h"
 #include "util/hash_table.h"
+#include "util/pointer_map.h"
 
 namespace {
 
@@ -54,23 +55,23 @@ public:
virtual ir_visitor_status visit_enter(ir_assignment *);
virtual ir_visitor_status visit_enter(ir_call *);
 
-   struct hash_table *ht;
+   struct pointer_map *map;
 };
 
 } /* unnamed namespace */
 
 static struct assignment_entry *
-get_assignment_entry(ir_variable *var, struct hash_table *ht)
+get_assignment_entry(ir_variable *var, struct pointer_map *map)
 {
-   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   struct map_entry *me = _mesa_pointer_map_search(map, var);
struct assignment_entry *entry;
 
-   if (hte) {
-  entry = (struct assignment_entry *) hte->data;
+   if (me) {
+  entry = (struct assignment_entry *) me->data;
} else {
   entry = (struct assignment_entry *) calloc(1, sizeof(*entry));
   entry->var = var;
-  _mesa_hash_table_insert(ht, var, entry);
+  _mesa_pointer_map_insert(map, var, entry);
}
 
return entry;
@@ -79,7 +80,7 @@ get_assignment_entry(ir_variable *var, struct hash_table *ht)
 ir_visitor_status
 ir_constant_variable_visitor::visit(ir_variable *ir)
 {
-   struct assignment_entry *entry = get_assignment_entry(ir, this->ht);
+   struct assignment_entry *entry = get_assignment_entry(ir, this->map);
entry->our_scope = true;
return visit_continue;
 }
@@ -98,7 +99,7 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir)
ir_constant *constval;
struct assignment_entry *entry;
 
-   entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht);
+   entry = get_assignment_entry(ir->lhs->variable_referenced(), this->map);
assert(entry);
entry->assignment_count++;
 
@@ -159,7 +160,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
 struct assignment_entry *entry;
 
 assert(var);
-entry = get_assignment_entry(var, this->ht);
+entry = get_assignment_entry(var, this->map);
 entry->assignment_count++;
   }
}
@@ -170,7 +171,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
   struct assignment_entry *entry;
 
   assert(var);
-  entry = get_assignment_entry(var, this->ht);
+  entry = get_assignment_entry(var, this->map);
   entry->assignment_count++;
}
 
@@ -186,22 +187,21 @@ do_constant_variable(exec_list *instructions)
bool progress = false;
ir_constant_variable_visitor v;
 
-   v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-  _mesa_key_pointer_equal);
+   v.map = _mesa_pointer_map_create(NULL);
v.run(instructions);
 
-   struct hash_entry *hte;
-   hash_table_foreach(v.ht, hte) {
-  struct assignment_entry *entry = (struct assignment_entry *) hte->data;
+   struct map_entry *me;
+   _mesa_pointer_map_foreach(v.map, me) {
+  struct assignment_entry *entry = (struct assignment_entry *) me->data;
 
   if (entry->assignment_count == 1 && entry->constval && entry->our_scope) 
{
 entry->var->constant_value = entry->constval;
 progress = true;
   }
-  hte->data = NULL;
+  me->data = NULL;
   free(entry);
}
-   _mesa_hash_table_destroy(v.ht, NULL);
+   _mesa_pointer_map_destroy(v.map, NULL);
 
return progress;
 }
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/18] util: Add a pointer map clone function

2018-04-11 Thread Thomas Helland
---
 src/util/pointer_map.c | 23 +++
 src/util/pointer_map.h |  3 +++
 2 files changed, 26 insertions(+)

diff --git a/src/util/pointer_map.c b/src/util/pointer_map.c
index 8076bd827f..463fa19282 100644
--- a/src/util/pointer_map.c
+++ b/src/util/pointer_map.c
@@ -102,6 +102,29 @@ _mesa_pointer_map_create(void *mem_ctx)
 
return map;
 }
+struct pointer_map *
+_mesa_pointer_map_clone(struct pointer_map *src, void *dst_mem_ctx)
+{
+   struct pointer_map *pm = ralloc(dst_mem_ctx, struct pointer_map);
+
+   if (pm == NULL)
+  return NULL;
+
+   memcpy(pm, src, sizeof(struct pointer_map));
+
+   pm->map = ralloc_array(pm, struct map_entry, pm->size);
+   pm->metadata = ralloc_array(pm, uint8_t, pm->size);
+
+   if (pm->map == NULL || pm->metadata == NULL) {
+  ralloc_free(pm);
+  return NULL;
+   }
+
+   memcpy(pm->map, src->map, pm->size * sizeof(struct map_entry));
+   memcpy(pm->metadata, src->metadata, pm->size * sizeof(uint8_t));
+
+   return pm;
+}
 
 /**
  * Frees the pointer map.
diff --git a/src/util/pointer_map.h b/src/util/pointer_map.h
index e1cef418d8..4bfc306a5f 100644
--- a/src/util/pointer_map.h
+++ b/src/util/pointer_map.h
@@ -55,6 +55,9 @@ struct pointer_map {
 struct pointer_map *
 _mesa_pointer_map_create(void *mem_ctx);
 
+struct pointer_map *
+_mesa_pointer_map_clone(struct pointer_map *, void *dst_mem_ctx);
+
 void _mesa_pointer_map_destroy(struct pointer_map *map,
void (*delete_function)(struct map_entry 
*entry));
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/18] util: Add initial pointer map implementation

2018-04-11 Thread Thomas Helland
The motivation is that for the common case of pointers as keys the
current hash table implementation has multiple disadvantages.
It stores the hash, which means we get more memory usage than
is strictly necessary. It also compares both the hash, and the
pointer against the key when searching, when simply comparing
the pointer is enough and just as cheap. Also, it has a very
cache unfriendly reprobing algorithm.

This implementation adresses all of these issue, plus more.
It uses a table of size 2^n, meaning we can simply do mask of bits
instead of computing an expensive modulo when inserting or searching
the table for entries. It also uses linear probing for cache locality.
It also has the nice effect that the CPU should be more likely to be
able to do speculative execution. To further improve cache locality
it takes a trick from the talk "Designing a Fast, Efficient,
cache-friendly Hash Table, Step by Steap" from the 2017 CppCon
held by Matt Kulundis; it stores the metadata separate from the
stored data. The way this is done is that it allocates one byte
per entry, uses 7 bits to store the lower bits of the hash,
and uses the last bit to indicate if the slot is empty. The net
result is a space saving of 7/24ths, along with a much improved
cache friendliness. This can be further improved by using SSE
instructions for processing a large number of entries at the time
but I found that to be too platform specific, so I left it out.
One can argue if the cache penalty of storing the hash in a
separate array, and having to swap cache lines to acquire the
key is as much a penalty as the gain from reduced memory usage.
I should probably swap this implementation for one that just
removes the storage of the hash, and see how that fares. That
would be similar to what is done for the set later in this series.

V2:  Use bitmask instead of modulo as map is always size 2^n

V3: Use some of the saved space to lower the load factor in map

This will reduce the length of clusters, effectively giving us shorter
probing lengths both on insertion and search. This should not affect
cache locality, as the only potential change would be that we find a
free slot more often, and that means we're done with the loop. The only
case where this hurts us in a negative way is when iterating the hash
table as we will need to iterate more entries.
---
 src/util/meson.build   |   2 +
 src/util/pointer_map.c | 323 +
 src/util/pointer_map.h |  95 +++
 3 files changed, 420 insertions(+)
 create mode 100644 src/util/pointer_map.c
 create mode 100644 src/util/pointer_map.h

diff --git a/src/util/meson.build b/src/util/meson.build
index eece1cefef..9b50647f34 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -48,6 +48,8 @@ files_mesa_util = files(
   'mesa-sha1.h',
   'os_time.c',
   'os_time.h',
+  'pointer_map.c',
+  'pointer_map.h',
   'sha1/sha1.c',
   'sha1/sha1.h',
   'ralloc.c',
diff --git a/src/util/pointer_map.c b/src/util/pointer_map.c
new file mode 100644
index 00..8076bd827f
--- /dev/null
+++ b/src/util/pointer_map.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * Implements a linear probing hash table specifically for pointer keys.
+ * It uses a separate metadata array for good cache locality when searching.
+ * The metadata array is an array of bytes, where the seven LSB stores a hash,
+ * and the first bit stores whether the entry is free. An important detail is
+ * that the bit being
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "pointer_map.h"
+#include "ralloc.h"
+#include "macros.h"
+
+static inline uint8_t
+get_hash(uint8_t *metadata)
+{
+   return *metadata & 0x7F;
+}
+
+static inline void
+set_hash(uint8_t *metadata, uint32_t

[Mesa-dev] [PATCH 05/18] glsl: Move ir_variable_refcount to using the pointer map

2018-04-11 Thread Thomas Helland
---
 src/compiler/glsl/ir_variable_refcount.cpp | 13 ++---
 src/compiler/glsl/ir_variable_refcount.h   |  4 ++--
 src/compiler/glsl/opt_dead_code.cpp|  6 +++---
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/compiler/glsl/ir_variable_refcount.cpp 
b/src/compiler/glsl/ir_variable_refcount.cpp
index 8306be10b9..c5bef9efbf 100644
--- a/src/compiler/glsl/ir_variable_refcount.cpp
+++ b/src/compiler/glsl/ir_variable_refcount.cpp
@@ -33,17 +33,16 @@
 #include "ir_visitor.h"
 #include "ir_variable_refcount.h"
 #include "compiler/glsl_types.h"
-#include "util/hash_table.h"
+#include "util/pointer_map.h"
 
 ir_variable_refcount_visitor::ir_variable_refcount_visitor()
 {
this->mem_ctx = ralloc_context(NULL);
-   this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-  _mesa_key_pointer_equal);
+   this->pm = _mesa_pointer_map_create(NULL);
 }
 
 static void
-free_entry(struct hash_entry *entry)
+free_entry(struct map_entry *entry)
 {
ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) 
entry->data;
 
@@ -61,7 +60,7 @@ free_entry(struct hash_entry *entry)
 ir_variable_refcount_visitor::~ir_variable_refcount_visitor()
 {
ralloc_free(this->mem_ctx);
-   _mesa_hash_table_destroy(this->ht, free_entry);
+   _mesa_pointer_map_destroy(this->pm, free_entry);
 }
 
 // constructor
@@ -79,13 +78,13 @@ 
ir_variable_refcount_visitor::get_variable_entry(ir_variable *var)
 {
assert(var);
 
-   struct hash_entry *e = _mesa_hash_table_search(this->ht, var);
+   struct map_entry *e = _mesa_pointer_map_search(this->pm, var);
if (e)
   return (ir_variable_refcount_entry *)e->data;
 
ir_variable_refcount_entry *entry = new ir_variable_refcount_entry(var);
assert(entry->referenced_count == 0);
-   _mesa_hash_table_insert(this->ht, var, entry);
+   _mesa_pointer_map_insert(this->pm, var, entry);
 
return entry;
 }
diff --git a/src/compiler/glsl/ir_variable_refcount.h 
b/src/compiler/glsl/ir_variable_refcount.h
index 4a90f08c91..270bef7ecd 100644
--- a/src/compiler/glsl/ir_variable_refcount.h
+++ b/src/compiler/glsl/ir_variable_refcount.h
@@ -81,9 +81,9 @@ public:
ir_variable_refcount_entry *get_variable_entry(ir_variable *var);
 
/**
-* Hash table mapping ir_variable to ir_variable_refcount_entry.
+* Pointer map mapping ir_variable to ir_variable_refcount_entry.
 */
-   struct hash_table *ht;
+   struct pointer_map *pm;
 
void *mem_ctx;
 };
diff --git a/src/compiler/glsl/opt_dead_code.cpp 
b/src/compiler/glsl/opt_dead_code.cpp
index 75e668ae46..78247d7f4c 100644
--- a/src/compiler/glsl/opt_dead_code.cpp
+++ b/src/compiler/glsl/opt_dead_code.cpp
@@ -31,7 +31,7 @@
 #include "ir_visitor.h"
 #include "ir_variable_refcount.h"
 #include "compiler/glsl_types.h"
-#include "util/hash_table.h"
+#include "util/pointer_map.h"
 
 static bool debug = false;
 
@@ -50,8 +50,8 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
 
v.run(instructions);
 
-   struct hash_entry *e;
-   hash_table_foreach(v.ht, e) {
+   struct map_entry *e;
+   _mesa_pointer_map_foreach(v.pm, e) {
   ir_variable_refcount_entry *entry = (ir_variable_refcount_entry 
*)e->data;
 
   /* Since each assignment is a reference, the refereneced count must be
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/18] glsl: Use pointer map in constant propagation

2018-04-11 Thread Thomas Helland
---
 src/compiler/glsl/opt_constant_propagation.cpp | 47 --
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/src/compiler/glsl/opt_constant_propagation.cpp 
b/src/compiler/glsl/opt_constant_propagation.cpp
index 05dc71efb7..8072bf4811 100644
--- a/src/compiler/glsl/opt_constant_propagation.cpp
+++ b/src/compiler/glsl/opt_constant_propagation.cpp
@@ -41,6 +41,7 @@
 #include "ir_optimization.h"
 #include "compiler/glsl_types.h"
 #include "util/hash_table.h"
+#include "util/pointer_map.h"
 
 namespace {
 
@@ -103,8 +104,7 @@ public:
   mem_ctx = ralloc_context(0);
   this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
   this->acp = new(mem_ctx) exec_list;
-  this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+  this->kills = _mesa_pointer_map_create(mem_ctx);
}
~ir_constant_propagation_visitor()
{
@@ -129,10 +129,10 @@ public:
exec_list *acp;
 
/**
-* Hash table of kill_entry: The masks of variables whose values were
+* Pointer map of kill_entry: The masks of variables whose values were
 * killed in this block.
 */
-   hash_table *kills;
+   pointer_map *kills;
 
bool progress;
 
@@ -269,12 +269,11 @@ 
ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
 * main() at link time, so they're irrelevant to us.
 */
exec_list *orig_acp = this->acp;
-   hash_table *orig_kills = this->kills;
+   pointer_map *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
this->acp = new(mem_ctx) exec_list;
-   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+   this->kills = _mesa_pointer_map_create(mem_ctx);
this->killed_all = false;
 
visit_list_elements(this, >body);
@@ -359,12 +358,11 @@ void
 ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
 {
exec_list *orig_acp = this->acp;
-   hash_table *orig_kills = this->kills;
+   pointer_map *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
this->acp = new(mem_ctx) exec_list;
-   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+   this->kills = _mesa_pointer_map_create(mem_ctx);
this->killed_all = false;
 
/* Populate the initial acp with a constant of the original */
@@ -378,14 +376,14 @@ 
ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
   orig_acp->make_empty();
}
 
-   hash_table *new_kills = this->kills;
+   pointer_map *new_kills = this->kills;
this->kills = orig_kills;
this->acp = orig_acp;
this->killed_all = this->killed_all || orig_killed_all;
 
-   hash_entry *htk;
-   hash_table_foreach(new_kills, htk) {
-  kill_entry *k = (kill_entry *) htk->data;
+   map_entry *me;
+   _mesa_pointer_map_foreach(new_kills, me) {
+  kill_entry *k = (kill_entry *) me->data;
   kill(k->var, k->write_mask);
}
 }
@@ -407,7 +405,7 @@ ir_visitor_status
 ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
 {
exec_list *orig_acp = this->acp;
-   hash_table *orig_kills = this->kills;
+   pointer_map *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
/* FINISHME: For now, the initial acp for loops is totally empty.
@@ -415,8 +413,7 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
 * cloned minus the killed entries after the first run through.
 */
this->acp = new(mem_ctx) exec_list;
-   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+   this->kills = _mesa_pointer_map_create(mem_ctx);
this->killed_all = false;
 
visit_list_elements(this, >body_instructions);
@@ -425,14 +422,14 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
   orig_acp->make_empty();
}
 
-   hash_table *new_kills = this->kills;
+   pointer_map *new_kills = this->kills;
this->kills = orig_kills;
this->acp = orig_acp;
this->killed_all = this->killed_all || orig_killed_all;
 
-   hash_entry *htk;
-   hash_table_foreach(new_kills, htk) {
-  kill_entry *k = (kill_entry *) htk->data;
+   map_entry *me;
+   _mesa_pointer_map_foreach(new_kills, me) {
+  kill_entry *k = (kill_entry *) me->data;
   kill(k->var, k->write_mask);
}
 
@@ -461,14 +458,14 @@ ir_constant_propagation_visitor::kill(ir_variable *var, 
unsigned write_mask)
/* Add this writemask of the variable to the hash table of killed
 * variables in this block.
 */
-   hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
-   if (kill_hash_entry) {
-  kill_entry *entry = (kill_entry *) kill_hash_entry->data;
+   map_entry *kill_map_entry = _mesa_pointer_map_search(this->kills, var);
+   if 

[Mesa-dev] [PATCH 00/18] [RFC] Pointer specific data structures

2018-04-11 Thread Thomas Helland
This series came about when I saw a talk online, while simultaneously
being annoyd about the needless waste of memory in our set as reported
by pahole. I have previously made some patches that changed our hash
table from a reprobing one to a quadratic probing one, in the name of
lower overhead and better cache locality, but I was not quite satisfied.

I'm sending this series out now, as it seems like an ideal time since
Timothy is working at reducing our compile times. Further details about 
the implementation and its advantages are described in the patches.
I've found this to give a reduction in shader-db runtime of about 2%,
but I have to do some more testing on my main computer, as my laptop
is showing its age with some terrible thermal issues.

This special cases on pointers, as that is a very common usecase.
This allows us to drop some comparisons, and reduce the total size
of our hash table to 70% or our current and the set to 50%. It uses 
linear probing and power-of-two table sizes to get good cache locality. 
In the pointer_map caes it moves the stored hashes out into it's own 
array for even better cache locality.

I'm not sure if we want another set and map amongst our utils,
but the patch series is simple enough, and complete enough,
that I thought I could share it for some inital comments.

CC: Timothy Arceri <tarc...@itsqueeze.com>

Thomas Helland (18):
  util: Add initial pointer map implementation
  glsl: Use pointer map in constant propagation
  util: Add a pointer map clone function
  glsl: Port copy propagation elements to pointer map
  glsl: Move ir_variable_refcount to using the pointer map
  nir: Change lower_vars_to_ssa to use pointer map
  glsl: Use pointer map in copy propagation
  glsl: Use pointer map in opt_constant_variable
  glsl: Change glsl_to_nir to user pointer map
  util: Add a call_foreach function to the pointer map
  glsl: Use the pointer map in the glsl linker
  nir: Use pointer map in nir_from_ssa
  util: Add a pointer set implementation
  nir: Migrate lower_vars_to_ssa to use pointer set
  glsl: Use pointer set in opt_copy_propagation
  nir: Use pointer set in remove_dead_variable
  nir: Use pointer_set in nir_propagate_invariant
  util: Just cut the hash in the pointer table

 src/compiler/glsl/glsl_to_nir.cpp  |  31 +-
 src/compiler/glsl/ir_variable_refcount.cpp |  13 +-
 src/compiler/glsl/ir_variable_refcount.h   |   4 +-
 src/compiler/glsl/linker.cpp   |  40 ++-
 src/compiler/glsl/opt_constant_propagation.cpp |  47 ++--
 src/compiler/glsl/opt_constant_variable.cpp|  34 +--
 src/compiler/glsl/opt_copy_propagation.cpp |  95 +++
 .../glsl/opt_copy_propagation_elements.cpp |  96 ---
 src/compiler/glsl/opt_dead_code.cpp|   6 +-
 src/compiler/nir/nir_from_ssa.c|  18 +-
 src/compiler/nir/nir_lower_vars_to_ssa.c   |  48 ++--
 src/compiler/nir/nir_propagate_invariant.c |  33 +--
 src/compiler/nir/nir_remove_dead_variables.c   |  37 +--
 src/util/meson.build   |   4 +
 src/util/pointer_map.c | 313 +
 src/util/pointer_map.h | 110 
 src/util/pointer_set.c | 266 +
 src/util/pointer_set.h |  90 ++
 18 files changed, 1026 insertions(+), 259 deletions(-)
 create mode 100644 src/util/pointer_map.c
 create mode 100644 src/util/pointer_map.h
 create mode 100644 src/util/pointer_set.c
 create mode 100644 src/util/pointer_set.h

-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/18] glsl: Port copy propagation elements to pointer map

2018-04-11 Thread Thomas Helland
---
 .../glsl/opt_copy_propagation_elements.cpp | 96 +++---
 1 file changed, 47 insertions(+), 49 deletions(-)

diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp 
b/src/compiler/glsl/opt_copy_propagation_elements.cpp
index 8bae424a1d..8737fe27a5 100644
--- a/src/compiler/glsl/opt_copy_propagation_elements.cpp
+++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp
@@ -46,7 +46,7 @@
 #include "ir_basic_block.h"
 #include "ir_optimization.h"
 #include "compiler/glsl_types.h"
-#include "util/hash_table.h"
+#include "util/pointer_map.h"
 
 static bool debug = false;
 
@@ -124,24 +124,22 @@ public:
   ralloc_free(mem_ctx);
}
 
-   void clone_acp(hash_table *lhs, hash_table *rhs)
+   void clone_acp(pointer_map *lhs, pointer_map *rhs)
{
-  lhs_ht = _mesa_hash_table_clone(lhs, mem_ctx);
-  rhs_ht = _mesa_hash_table_clone(rhs, mem_ctx);
+  lhs_pm = _mesa_pointer_map_clone(lhs, mem_ctx);
+  rhs_pm = _mesa_pointer_map_clone(rhs, mem_ctx);
}
 
void create_acp()
{
-  lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-   _mesa_key_pointer_equal);
-  rhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-   _mesa_key_pointer_equal);
+  lhs_pm = _mesa_pointer_map_create(mem_ctx);
+  rhs_pm = _mesa_pointer_map_create(mem_ctx);
}
 
void destroy_acp()
{
-  _mesa_hash_table_destroy(lhs_ht, NULL);
-  _mesa_hash_table_destroy(rhs_ht, NULL);
+  _mesa_pointer_map_destroy(lhs_pm, NULL);
+  _mesa_pointer_map_destroy(rhs_pm, NULL);
}
 
void handle_loop(ir_loop *, bool keep_acp);
@@ -159,8 +157,8 @@ public:
void handle_if_block(exec_list *instructions);
 
/** Hash of acp_entry: The available copies to propagate */
-   hash_table *lhs_ht;
-   hash_table *rhs_ht;
+   pointer_map *lhs_pm;
+   pointer_map *rhs_pm;
 
/**
 * List of kill_entry: The variables whose values were killed in this
@@ -191,8 +189,8 @@ 
ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
exec_list *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   hash_table *orig_lhs_ht = lhs_ht;
-   hash_table *orig_rhs_ht = rhs_ht;
+   pointer_map *orig_lhs_pm = lhs_pm;
+   pointer_map *orig_rhs_pm = rhs_pm;
 
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
@@ -208,8 +206,8 @@ 
ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
this->kills = orig_kills;
this->killed_all = orig_killed_all;
 
-   lhs_ht = orig_lhs_ht;
-   rhs_ht = orig_rhs_ht;
+   lhs_pm = orig_lhs_pm;
+   rhs_pm = orig_rhs_pm;
 
return visit_continue_with_parent;
 }
@@ -296,9 +294,9 @@ 
ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
/* Try to find ACP entries covering swizzle_chan[], hoping they're
 * the same source variable.
 */
-   hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, var);
-   if (ht_entry) {
-  exec_list *ht_list = (exec_list *) ht_entry->data;
+   map_entry *pm_entry = _mesa_pointer_map_search(lhs_pm, var);
+   if (pm_entry) {
+  exec_list *ht_list = (exec_list *) pm_entry->data;
   foreach_in_list(acp_entry, entry, ht_list) {
  for (int c = 0; c < chans; c++) {
 if (entry->write_mask & (1 << swizzle_chan[c])) {
@@ -368,8 +366,8 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_call 
*ir)
/* Since we're unlinked, we don't (necessarily) know the side effects of
 * this call.  So kill all copies.
 */
-   _mesa_hash_table_clear(lhs_ht, NULL);
-   _mesa_hash_table_clear(rhs_ht, NULL);
+   _mesa_pointer_map_clear(lhs_pm);
+   _mesa_pointer_map_clear(rhs_pm);
 
this->killed_all = true;
 
@@ -382,20 +380,20 @@ 
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
exec_list *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   hash_table *orig_lhs_ht = lhs_ht;
-   hash_table *orig_rhs_ht = rhs_ht;
+   pointer_map *orig_lhs_pm = lhs_pm;
+   pointer_map *orig_rhs_pm = rhs_pm;
 
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
/* Populate the initial acp with a copy of the original */
-   clone_acp(orig_lhs_ht, orig_rhs_ht);
+   clone_acp(orig_lhs_pm, orig_rhs_pm);
 
visit_list_elements(this, instructions);
 
if (this->killed_all) {
-  _mesa_hash_table_clear(orig_lhs_ht, NULL);
-  _mesa_hash_table_clear(orig_rhs_ht, NULL);
+  _mesa_pointer_map_clear(orig_lhs_pm);
+  _mesa_pointer_map_clear(orig_rhs_pm);
}
 
exec_list *new_kills = this->kills;
@@ -404,8 +402,8 @@ 
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
 
destroy_acp();
 
-   lhs_ht = orig_lhs_ht;
-   rhs_ht = orig_rhs_ht;
+   lhs_pm = orig_lhs_pm;
+   rhs_pm = orig_rhs_pm;
 
/* Move the new kills into the parent block's list, removing them
 * from 

Re: [Mesa-dev] [PATCH] nir: Check if u_vector_init() succeeds

2018-04-05 Thread Thomas Helland
Sorry about that. It doesn't hurt to check.

Reviewed-by: Thomas Helland <thomashellan...@gmail.com>

2018-04-05 9:35 GMT+02:00 Topi Pohjolainen <topi.pohjolai...@gmail.com>:
> However, it only fails when running out of memory. Now, if we
> are about to check that, we should be consistent and check
> the allocation of the worklist as well.
>
> On the other hand there are other places where we don't check
> for allocation failures. Therefore I'm not sure if we bother
> here either. Coverity complains but I can mark it as ignored
> the same.
>
> CID: 1433512
> Fixes: edb18564c7 nir: Initial implementation of a nir_instr_worklist
> CC: Thomas Helland <thomashellan...@gmail.com>
> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com>
> ---
>  src/compiler/nir/nir_worklist.h | 11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h
> index e376908..3fb391f 100644
> --- a/src/compiler/nir/nir_worklist.h
> +++ b/src/compiler/nir/nir_worklist.h
> @@ -105,8 +105,15 @@ typedef struct {
>  static inline nir_instr_worklist *
>  nir_instr_worklist_create() {
> nir_instr_worklist *wl = malloc(sizeof(nir_instr_worklist));
> -   u_vector_init(>instr_vec, sizeof(struct nir_instr *),
> - sizeof(struct nir_instr *) * 8);
> +   if (!wl)
> +  return NULL;
> +
> +   if (!u_vector_init(>instr_vec, sizeof(struct nir_instr *),
> +  sizeof(struct nir_instr *) * 8)) {
> +  free(wl);
> +  return NULL;
> +   }
> +
> return wl;
>  }
>
> --
> 2.7.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nir: Initial implementation of a nir_instr_worklist

2018-03-17 Thread Thomas Helland
Make a simple worklist by basically just wrapping u_vector.
This is intended used in nir_opt_dce to reduce the number of calls
to ralloc, as we are currenlty spamming ralloc quite bad. It should
also give better cache locality and much lower memory usage.
---
 src/compiler/nir/nir_worklist.h | 69 +
 1 file changed, 69 insertions(+)

diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h
index 39521a386c..5071c7aec1 100644
--- a/src/compiler/nir/nir_worklist.h
+++ b/src/compiler/nir/nir_worklist.h
@@ -30,6 +30,8 @@
 #define _NIR_WORKLIST_
 
 #include "nir.h"
+#include "util/set.h"
+#include "util/u_vector.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -83,6 +85,73 @@ nir_block *nir_block_worklist_peek_tail(const 
nir_block_worklist *w);
 
 nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w);
 
+
+
+
+/*
+ * This worklist implementation, in contrast to the block worklist, does not
+ * have unique entries, meaning a nir_instr can be inserted more than once
+ * into the worklist. It uses u_vector to keep the overhead and memory
+ * footprint at a minimum.
+ * 
+ * Making it unique by using a set was tested, but for the single usecase
+ * (nir_opt_dce) it did not improve speed. There we check the pass_flag bit
+ * and abort immediately if there's nothing to do, so the added overhead of
+ * the set was higher than just processing the few extra entries.
+ */
+
+typedef struct {
+   struct u_vector instr_vec;
+} nir_instr_worklist;
+
+static inline nir_instr_worklist *
+nir_instr_worklist_create() {
+   nir_instr_worklist *wl = malloc(sizeof(nir_instr_worklist));
+   u_vector_init(>instr_vec, sizeof(struct nir_instr *),
+ sizeof(struct nir_instr *) * 8);
+   return wl;
+}
+
+static inline uint32_t
+nir_instr_worklist_length(nir_instr_worklist *wl)
+{
+   return u_vector_length(>instr_vec);
+}
+
+static inline bool
+nir_instr_worklist_empty(nir_instr_worklist *wl)
+{
+   return nir_instr_worklist_length(wl) == 0;
+}
+
+static inline void
+nir_instr_worklist_destroy(nir_instr_worklist *wl)
+{
+   u_vector_finish(>instr_vec);
+   free(wl);
+}
+
+static inline void
+nir_instr_worklist_push_tail(nir_instr_worklist *wl, nir_instr *instr)
+{
+   struct nir_instr **vec_instr = u_vector_add(>instr_vec);
+   *vec_instr = instr;
+}
+
+static inline nir_instr *
+nir_instr_worklist_pop_head(nir_instr_worklist *wl)
+{
+   struct nir_instr **vec_instr = u_vector_remove(>instr_vec);
+
+   if (vec_instr == NULL)
+  return NULL;
+
+   return *vec_instr;
+}
+
+#define nir_instr_worklist_foreach(wl, instr)\
+   while ((instr = nir_instr_worklist_pop_head(wl)))
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nir: Migrate nir_dce to instr worklist

2018-03-17 Thread Thomas Helland
Shader-db runtime change avarage of five runs:
   Before 125,77 seconds (+/- 0,09%)
   After  124,48 seconds (+/- 0,07%)
---
 src/compiler/nir/nir_opt_dce.c  | 53 ++---
 src/compiler/nir/nir_worklist.h |  1 +
 2 files changed, 19 insertions(+), 35 deletions(-)

diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c
index 5cefba3a72..570e43000c 100644
--- a/src/compiler/nir/nir_opt_dce.c
+++ b/src/compiler/nir/nir_opt_dce.c
@@ -26,45 +26,30 @@
  */
 
 #include "nir.h"
+#include "nir_worklist.h"
 
 /* SSA-based mark-and-sweep dead code elimination */
 
-typedef struct {
-   struct exec_node node;
-   nir_instr *instr;
-} worklist_elem;
-
 static void
-worklist_push(struct exec_list *worklist, nir_instr *instr)
+mark_and_push(nir_instr_worklist *wl, nir_instr *instr)
 {
-   worklist_elem *elem = ralloc(worklist, worklist_elem);
-   elem->instr = instr;
+   nir_instr_worklist_push_tail(wl, instr);
instr->pass_flags = 1;
-   exec_list_push_tail(worklist, >node);
-}
-
-static nir_instr *
-worklist_pop(struct exec_list *worklist)
-{
-   struct exec_node *node = exec_list_pop_head(worklist);
-   worklist_elem *elem = exec_node_data(worklist_elem, node, node);
-   return elem->instr;
 }
 
 static bool
 mark_live_cb(nir_src *src, void *_state)
 {
-   struct exec_list *worklist = (struct exec_list *) _state;
+   nir_instr_worklist *worklist = (nir_instr_worklist *) _state;
 
-   if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
-  worklist_push(worklist, src->ssa->parent_instr);
-   }
+   if (src->is_ssa && !src->ssa->parent_instr->pass_flags)
+  mark_and_push(worklist, src->ssa->parent_instr);
 
return true;
 }
 
 static void
-init_instr(nir_instr *instr, struct exec_list *worklist)
+init_instr(nir_instr *instr, nir_instr_worklist *worklist)
 {
nir_alu_instr *alu_instr;
nir_intrinsic_instr *intrin_instr;
@@ -79,13 +64,13 @@ init_instr(nir_instr *instr, struct exec_list *worklist)
switch (instr->type) {
case nir_instr_type_call:
case nir_instr_type_jump:
-  worklist_push(worklist, instr);
+  mark_and_push(worklist, instr);
   break;
 
case nir_instr_type_alu:
   alu_instr = nir_instr_as_alu(instr);
   if (!alu_instr->dest.dest.is_ssa)
- worklist_push(worklist, instr);
+ mark_and_push(worklist, instr);
   break;
 
case nir_instr_type_intrinsic:
@@ -94,17 +79,17 @@ init_instr(nir_instr *instr, struct exec_list *worklist)
   NIR_INTRINSIC_CAN_ELIMINATE) {
  if (nir_intrinsic_infos[intrin_instr->intrinsic].has_dest &&
  !intrin_instr->dest.is_ssa) {
-worklist_push(worklist, instr);
+mark_and_push(worklist, instr);
  }
   } else {
- worklist_push(worklist, instr);
+ mark_and_push(worklist, instr);
   }
   break;
 
case nir_instr_type_tex:
   tex_instr = nir_instr_as_tex(instr);
   if (!tex_instr->dest.is_ssa)
- worklist_push(worklist, instr);
+ mark_and_push(worklist, instr);
   break;
 
default:
@@ -113,7 +98,7 @@ init_instr(nir_instr *instr, struct exec_list *worklist)
 }
 
 static bool
-init_block(nir_block *block, struct exec_list *worklist)
+init_block(nir_block *block, nir_instr_worklist *worklist)
 {
nir_foreach_instr(instr, block)
   init_instr(instr, worklist);
@@ -122,7 +107,7 @@ init_block(nir_block *block, struct exec_list *worklist)
if (following_if) {
   if (following_if->condition.is_ssa &&
   !following_if->condition.ssa->parent_instr->pass_flags)
- worklist_push(worklist, following_if->condition.ssa->parent_instr);
+ mark_and_push(worklist, following_if->condition.ssa->parent_instr);
}
 
return true;
@@ -131,19 +116,17 @@ init_block(nir_block *block, struct exec_list *worklist)
 static bool
 nir_opt_dce_impl(nir_function_impl *impl)
 {
-   struct exec_list *worklist = rzalloc(NULL, struct exec_list);
-   exec_list_make_empty(worklist);
+   nir_instr_worklist *worklist = nir_instr_worklist_create();
 
nir_foreach_block(block, impl) {
   init_block(block, worklist);
}
 
-   while (!exec_list_is_empty(worklist)) {
-  nir_instr *instr = worklist_pop(worklist);
+   nir_instr *instr = NULL;
+   nir_instr_worklist_foreach(worklist, instr)
   nir_foreach_src(instr, mark_live_cb, worklist);
-   }
 
-   ralloc_free(worklist);
+   nir_instr_worklist_destroy(worklist);
 
bool progress = false;
 
diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h
index 5071c7aec1..5eeaca1795 100644
--- a/src/compiler/nir/nir_worklist.h
+++ b/src/compiler/nir/nir_worklist.h
@@ -85,6 +85,7 @@ nir_block *nir_block_worklist_peek_tail(const 
nir_block_worklist *w);
 
 nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w);
 
+typedef struct nir_instr_worklist_node nir_instr_worklist_node;
 
 
 
-- 
2.16.2

___
mesa-dev 

[Mesa-dev] [PATCH 0/2] nir_instr_worklist for nir_opt_dce V2

2018-03-17 Thread Thomas Helland
This is take two on reducing ralloc overhead in nir_opt_dce.
I've ditched the previous solution with a freelist, and instead
gone for a wrapper on u_vector. That should remove the need for a 
freelist alltogether, and at the same time lower our memory usage.

CC: Eric Anholt <e...@anholt.net>

Thomas Helland (2):
  nir: Initial implementation of a nir_instr_worklist
  nir: Migrate nir_dce to instr worklist

 src/compiler/nir/nir_opt_dce.c  | 53 +++
 src/compiler/nir/nir_worklist.h | 70 +
 2 files changed, 88 insertions(+), 35 deletions(-)

-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

2018-03-15 Thread Thomas Helland
Yup, most definitely. I just have one more thing to test before
sending out a V2. I've toyed around with arrays and sets and
stuff to see if there are better options than a linked list.
At least for now the answer is: "no, there isn't", but I'm gonna
test u_vector for this use later today to see if that is even better.
Expect new patch this evening CET.

2018-03-14 20:58 GMT+01:00 Dieter Nützel <die...@nuetzel-hh.de>:
> Hello Thomas,
>
> is this useful even after '[Mesa-dev] [PATCH 0/2] V2: Use hash table cloning
> in copy propagation' landed?
>
> I've running both together with Dave's '[Mesa-dev] [PATCH] radv/winsys:
> replace bo list searchs with a hash table.' patch.
>
> Dieter
>
>
> Am 24.01.2018 08:33, schrieb Thomas Helland:
>>
>> 2018-01-21 23:58 GMT+01:00 Eric Anholt <e...@anholt.net>:
>>>
>>> Thomas Helland <thomashellan...@gmail.com> writes:
>>>
>>>> Also, allocate worklist_elem in groups of 20, to reduce the burden of
>>>> allocation. Do not use rzalloc, as there is no need. This lets us drop
>>>> the number of calls to ralloc from aproximately 10% of all calls to
>>>> ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size.
>>>> This cuts the runtime of shader-db by 1%, while at the same time
>>>> reducing the number of stalled cycles, executed cycles, and executed
>>>> instructions by about 1 % as reported by perf. I did a five-run
>>>> benchmark pre and post and got a statistical variance less than 0.1% pre
>>>> and post. This was with i965's ir validation polluting the benchmark, so
>>>> the numbers are even better in release builds.
>>>>
>>>> Performance change as found with perf-diff:
>>>> 4.74% -0.23%  libc-2.26.so[.] _int_malloc
>>>> 1.88% -0.21%  libc-2.26.so[.] malloc
>>>> 2.27% +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
>>>> 2.95% -0.12%  libc-2.26.so[.] _int_free
>>>>   +0.11%  libmesa_dri_drivers.so  [.] worklist_push
>>>> 1.22% -0.08%  libc-2.26.so[.] malloc_consolidate
>>>> 0.16% -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
>>>> 1.21% +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
>>>> 0.75% -0.05%  libc-2.26.so[.] cfree@GLIBC_2.2.5
>>>> 0.50% -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
>>>> 0.57% +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
>>>> 1.29% -0.04%  libmesa_dri_drivers.so  [.] unsafe_free
>>>
>>>
>>> I'm curious, since a NIR instruction worklist seems like a generally
>>> useful thing to have:
>>>
>>> Could nir_worklist.c keep the implementation of this?
>>>
>>> Also, I wonder if it wouldn't be even better to have a u_dynarray of
>>> instructions in the worklist, with push/pop on the end of the array, and
>>> a struct set tracking the instructions in the array to avoid
>>> double-adding.  I actually don't know if that would be better or not, so
>>> I'd be happy with the worklist management just moved to nir_worklist.c.
>>
>>
>> I'll look into this to see what I can do. nir_worklist.c at this time has
>> only
>> a block worklist. This numbers all the blocks, uses a bitset for checking
>> if the item is present, and uses an array with an index pointing to the
>> start of the queue of blocks in the buffer.
>>
>> The same scheme could be easily used for ssa-defs, as these are
>> also numbered. I actually did this for the VRP pass I wrote years ago.
>>
>> However, for instructions we do not have a way of numbering them,
>> so a different scheme would have to be used. A dynarray + set type
>> of thing, us you're suggesting, might get us where we want.
>> I'll see what I can come up with.
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] V2: Use hash table cloning in copy propagation

2018-03-14 Thread Thomas Helland
2018-03-14 8:13 GMT+01:00 Dave Airlie <airl...@gmail.com>:
> On 14 March 2018 at 09:22, Thomas Helland <thomashellan...@gmail.com> wrote:
>> Sending out a v2 just in case someone has any more comments.
>> If not I'll probably push these tomorrow, or thursday.
>>
>> This is a revival of some old patches I had around to improve
>> the compile times in the glsl compiler by reducing the time
>> spend inserting items in the hash table in opt_copy_propagation.
>> I've only rebased this, as my system don't even want to compile
>> anything right now. I also don't remember if it was thoroughly
>> tested, so that will have to be done. Sending it out as Dave
>> might be interested in this to mitigate some of the overhead
>> his soft-dobule implementation incurs.
>>
>> Thomas Helland (2):
>>   util: Implement a hash table cloning function
>>   glsl: Use hash table cloning in copy propagation
>
> It definitely reduces the overheads in my tests, copy prop is no long
> a major thing
> in my perf results if I apply these.
>
> Dave.

That's awesome! I'll push this later this evening then =)
I've more patches in the works that could help you here,
I'll see if I can get those sent to the list later this evening.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] util: Implement a hash table cloning function

2018-03-13 Thread Thomas Helland
V2: Don't rzalloc; we are about to rewrite the whole thing (Vladislav)

Reviewed-by: Eric Anholt 
---
 src/util/hash_table.c | 22 ++
 src/util/hash_table.h |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index b7421a0144..f8d5d0f88a 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -141,6 +141,28 @@ _mesa_hash_table_create(void *mem_ctx,
return ht;
 }
 
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx)
+{
+   struct hash_table *ht;
+
+   ht = ralloc(dst_mem_ctx, struct hash_table);
+   if (ht == NULL)
+  return NULL;
+
+   memcpy(ht, src, sizeof(struct hash_table));
+
+   ht->table = ralloc_array(ht, struct hash_entry, ht->size);
+   if (ht->table == NULL) {
+  ralloc_free(ht);
+  return NULL;
+   }
+
+   memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry));
+
+   return ht;
+}
+
 /**
  * Frees the given hash table.
  *
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index d3e0758b26..3846dad4b4 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -62,6 +62,8 @@ _mesa_hash_table_create(void *mem_ctx,
 uint32_t (*key_hash_function)(const void *key),
 bool (*key_equals_function)(const void *a,
 const void *b));
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx);
 void _mesa_hash_table_destroy(struct hash_table *ht,
   void (*delete_function)(struct hash_entry 
*entry));
 void _mesa_hash_table_clear(struct hash_table *ht,
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] V2: Use hash table cloning in copy propagation

2018-03-13 Thread Thomas Helland
Sending out a v2 just in case someone has any more comments.
If not I'll probably push these tomorrow, or thursday.

This is a revival of some old patches I had around to improve
the compile times in the glsl compiler by reducing the time
spend inserting items in the hash table in opt_copy_propagation.
I've only rebased this, as my system don't even want to compile
anything right now. I also don't remember if it was thoroughly
tested, so that will have to be done. Sending it out as Dave
might be interested in this to mitigate some of the overhead
his soft-dobule implementation incurs.

Thomas Helland (2):
  util: Implement a hash table cloning function
  glsl: Use hash table cloning in copy propagation

 src/compiler/glsl/opt_copy_propagation.cpp | 17 -
 .../glsl/opt_copy_propagation_elements.cpp | 29 --
 src/util/hash_table.c  | 22 
 src/util/hash_table.h  |  2 ++
 4 files changed, 39 insertions(+), 31 deletions(-)

-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] glsl: Use hash table cloning in copy propagation

2018-03-13 Thread Thomas Helland
Walking the whole hash table, inserting entries by hashing them first
is just a really bad idea. We can simply memcpy the whole thing.

While this does not have a major performance impact on average,
as it only helps shaders with a lot of branches, it might help
individual shaders quite a lot. For my shader-db I get a reduction
from 1'381 (+-0,03%) to 1'272 (+-0,03%) billion cycles on five runs,
as reported by "perf stat".

V2: Remove leftover creation of acp in two places

Reviewed-by: Eric Anholt 
---
 src/compiler/glsl/opt_copy_propagation.cpp | 17 -
 .../glsl/opt_copy_propagation_elements.cpp | 29 --
 2 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
b/src/compiler/glsl/opt_copy_propagation.cpp
index e904e6ede4..6220aa86da 100644
--- a/src/compiler/glsl/opt_copy_propagation.cpp
+++ b/src/compiler/glsl/opt_copy_propagation.cpp
@@ -213,17 +213,12 @@ ir_copy_propagation_visitor::handle_if_block(exec_list 
*instructions)
set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
kills = _mesa_set_create(NULL, _mesa_hash_pointer,
 _mesa_key_pointer_equal);
this->killed_all = false;
 
/* Populate the initial acp with a copy of the original */
-   struct hash_entry *entry;
-   hash_table_foreach(orig_acp, entry) {
-  _mesa_hash_table_insert(acp, entry->key, entry->data);
-   }
+   acp = _mesa_hash_table_clone(orig_acp, NULL);
 
visit_list_elements(this, instructions);
 
@@ -264,17 +259,15 @@ ir_copy_propagation_visitor::handle_loop(ir_loop *ir, 
bool keep_acp)
set *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
 
-   acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
kills = _mesa_set_create(NULL, _mesa_hash_pointer,
 _mesa_key_pointer_equal);
this->killed_all = false;
 
if (keep_acp) {
-  struct hash_entry *entry;
-  hash_table_foreach(orig_acp, entry) {
- _mesa_hash_table_insert(acp, entry->key, entry->data);
-  }
+  acp = _mesa_hash_table_clone(orig_acp, NULL);
+   } else {
+  acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+_mesa_key_pointer_equal);
}
 
visit_list_elements(this, >body_instructions);
diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp 
b/src/compiler/glsl/opt_copy_propagation_elements.cpp
index 9f79fa9202..8bae424a1d 100644
--- a/src/compiler/glsl/opt_copy_propagation_elements.cpp
+++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp
@@ -124,6 +124,12 @@ public:
   ralloc_free(mem_ctx);
}
 
+   void clone_acp(hash_table *lhs, hash_table *rhs)
+   {
+  lhs_ht = _mesa_hash_table_clone(lhs, mem_ctx);
+  rhs_ht = _mesa_hash_table_clone(rhs, mem_ctx);
+   }
+
void create_acp()
{
   lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
@@ -138,19 +144,6 @@ public:
   _mesa_hash_table_destroy(rhs_ht, NULL);
}
 
-   void populate_acp(hash_table *lhs, hash_table *rhs)
-   {
-  struct hash_entry *entry;
-
-  hash_table_foreach(lhs, entry) {
- _mesa_hash_table_insert(lhs_ht, entry->key, entry->data);
-  }
-
-  hash_table_foreach(rhs, entry) {
- _mesa_hash_table_insert(rhs_ht, entry->key, entry->data);
-  }
-   }
-
void handle_loop(ir_loop *, bool keep_acp);
virtual ir_visitor_status visit_enter(class ir_loop *);
virtual ir_visitor_status visit_enter(class ir_function_signature *);
@@ -395,10 +388,8 @@ 
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
-   create_acp();
-
/* Populate the initial acp with a copy of the original */
-   populate_acp(orig_lhs_ht, orig_rhs_ht);
+   clone_acp(orig_lhs_ht, orig_rhs_ht);
 
visit_list_elements(this, instructions);
 
@@ -454,11 +445,11 @@ ir_copy_propagation_elements_visitor::handle_loop(ir_loop 
*ir, bool keep_acp)
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
-   create_acp();
-
if (keep_acp) {
   /* Populate the initial acp with a copy of the original */
-  populate_acp(orig_lhs_ht, orig_rhs_ht);
+  clone_acp(orig_lhs_ht, orig_rhs_ht);
+   } else {
+  create_acp();
}
 
visit_list_elements(this, >body_instructions);
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] util: Implement a hash table cloning function

2018-03-13 Thread Thomas Helland
2018-03-12 19:48 GMT+01:00 Emil Velikov <emil.l.veli...@gmail.com>:
> Hi Thomas,
>
> On 12 March 2018 at 17:55, Thomas Helland <thomashellan...@gmail.com> wrote:
>> V2: Don't rzalloc; we are about to rewrite the whole thing (Vladislav)
>> ---
>>  src/util/hash_table.c | 22 ++
>>  src/util/hash_table.h |  2 ++
>>  2 files changed, 24 insertions(+)
>>
>> diff --git a/src/util/hash_table.c b/src/util/hash_table.c
>> index b7421a0144..f8d5d0f88a 100644
>> --- a/src/util/hash_table.c
>> +++ b/src/util/hash_table.c
>> @@ -141,6 +141,28 @@ _mesa_hash_table_create(void *mem_ctx,
>> return ht;
>>  }
>>
>> +struct hash_table *
>> +_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx)
>> +{
>> +   struct hash_table *ht;
>> +
>> +   ht = ralloc(dst_mem_ctx, struct hash_table);
>> +   if (ht == NULL)
>> +  return NULL;
>> +
>> +   memcpy(ht, src, sizeof(struct hash_table));
>> +
>> +   ht->table = ralloc_array(ht, struct hash_entry, ht->size);
>> +   if (ht->table == NULL) {
>> +  ralloc_free(ht);
>> +  return NULL;
>> +   }
>> +
>> +   memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry));
>> +
> Thinking out loud:
>
> I'm wondering if it won't make sense to reuse _mesa_hash_table_create,
> instead of open-coding it?
>
> -Emil

That wont work like you might expect. The hash table will then be initialized
to the wrong size. We want an exact copy, so we have to make sure we copy
also things like size, deleted_entries, etc. If we don't we can not memcpy
the array of entries, defeating the main purpose which is to avoid all the
insertions into the hash table,

-Thomas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Hash table cloning for copy propagation

2018-03-12 Thread Thomas Helland
I've also uploaded this series to my github, if you wan to
pull them down from there [1].

I've also uploaded my previously talked about pointer_map
to my github account [2]. There's a pointer map, pointer set,
and some patches for nir in there, and some for disabling
asserts in some places. So it's not ready for primetime,
but that series has been tested recently, and has been
stable for a couple months now. Been tinkering with it
and adding small pieces now and then. What remains is
a bench-a-tonne to ensure it is OK performance wise,
and cleaning it up for posting on the mailing list.

[1]: https://github.com/thohel/mesa/commits/hash-table-clone
[2]: https://github.com/thohel/mesa/commits/pointer_map

2018-03-12 18:55 GMT+01:00 Thomas Helland <thomashellan...@gmail.com>:
> This is a revival of some old patches I had around to improve
> the compile times in the glsl compiler by reducing the time
> spend inserting items in the hash table in opt_copy_propagation.
> I've only rebased this, as my system don't even want to compile
> anything right now. I also don't remember if it was thoroughly
> tested, so that will have to be done. Sending it out as Dave
> might be interested in this to mitigate some of the overhead
> his soft-dobule implementation incurs.
>
> CC: Dave Airlie <airl...@gmail.com>
>
> Thomas Helland (2):
>   util: Implement a hash table cloning function
>   glsl: Use hash table cloning in copy propagation
>
>  src/compiler/glsl/opt_copy_propagation.cpp | 13 --
>  .../glsl/opt_copy_propagation_elements.cpp | 29 
> --
>  src/util/hash_table.c  | 22 
>  src/util/hash_table.h  |  2 ++
>  4 files changed, 39 insertions(+), 27 deletions(-)
>
> --
> 2.15.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] glsl: Use hash table cloning in copy propagation

2018-03-12 Thread Thomas Helland
Walking the whole hash table, inserting entries by hashing them first
is just a really bad idea. We can simply memcpy the whole thing.
---
 src/compiler/glsl/opt_copy_propagation.cpp | 13 --
 .../glsl/opt_copy_propagation_elements.cpp | 29 --
 2 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
b/src/compiler/glsl/opt_copy_propagation.cpp
index e904e6ede4..96667779da 100644
--- a/src/compiler/glsl/opt_copy_propagation.cpp
+++ b/src/compiler/glsl/opt_copy_propagation.cpp
@@ -220,10 +220,7 @@ ir_copy_propagation_visitor::handle_if_block(exec_list 
*instructions)
this->killed_all = false;
 
/* Populate the initial acp with a copy of the original */
-   struct hash_entry *entry;
-   hash_table_foreach(orig_acp, entry) {
-  _mesa_hash_table_insert(acp, entry->key, entry->data);
-   }
+   acp = _mesa_hash_table_clone(orig_acp, NULL);
 
visit_list_elements(this, instructions);
 
@@ -271,10 +268,10 @@ ir_copy_propagation_visitor::handle_loop(ir_loop *ir, 
bool keep_acp)
this->killed_all = false;
 
if (keep_acp) {
-  struct hash_entry *entry;
-  hash_table_foreach(orig_acp, entry) {
- _mesa_hash_table_insert(acp, entry->key, entry->data);
-  }
+  acp = _mesa_hash_table_clone(orig_acp, NULL);
+   } else {
+  acp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+_mesa_key_pointer_equal);
}
 
visit_list_elements(this, >body_instructions);
diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp 
b/src/compiler/glsl/opt_copy_propagation_elements.cpp
index 9f79fa9202..8bae424a1d 100644
--- a/src/compiler/glsl/opt_copy_propagation_elements.cpp
+++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp
@@ -124,6 +124,12 @@ public:
   ralloc_free(mem_ctx);
}
 
+   void clone_acp(hash_table *lhs, hash_table *rhs)
+   {
+  lhs_ht = _mesa_hash_table_clone(lhs, mem_ctx);
+  rhs_ht = _mesa_hash_table_clone(rhs, mem_ctx);
+   }
+
void create_acp()
{
   lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
@@ -138,19 +144,6 @@ public:
   _mesa_hash_table_destroy(rhs_ht, NULL);
}
 
-   void populate_acp(hash_table *lhs, hash_table *rhs)
-   {
-  struct hash_entry *entry;
-
-  hash_table_foreach(lhs, entry) {
- _mesa_hash_table_insert(lhs_ht, entry->key, entry->data);
-  }
-
-  hash_table_foreach(rhs, entry) {
- _mesa_hash_table_insert(rhs_ht, entry->key, entry->data);
-  }
-   }
-
void handle_loop(ir_loop *, bool keep_acp);
virtual ir_visitor_status visit_enter(class ir_loop *);
virtual ir_visitor_status visit_enter(class ir_function_signature *);
@@ -395,10 +388,8 @@ 
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
-   create_acp();
-
/* Populate the initial acp with a copy of the original */
-   populate_acp(orig_lhs_ht, orig_rhs_ht);
+   clone_acp(orig_lhs_ht, orig_rhs_ht);
 
visit_list_elements(this, instructions);
 
@@ -454,11 +445,11 @@ ir_copy_propagation_elements_visitor::handle_loop(ir_loop 
*ir, bool keep_acp)
this->kills = new(mem_ctx) exec_list;
this->killed_all = false;
 
-   create_acp();
-
if (keep_acp) {
   /* Populate the initial acp with a copy of the original */
-  populate_acp(orig_lhs_ht, orig_rhs_ht);
+  clone_acp(orig_lhs_ht, orig_rhs_ht);
+   } else {
+  create_acp();
}
 
visit_list_elements(this, >body_instructions);
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] util: Implement a hash table cloning function

2018-03-12 Thread Thomas Helland
V2: Don't rzalloc; we are about to rewrite the whole thing (Vladislav)
---
 src/util/hash_table.c | 22 ++
 src/util/hash_table.h |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index b7421a0144..f8d5d0f88a 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -141,6 +141,28 @@ _mesa_hash_table_create(void *mem_ctx,
return ht;
 }
 
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx)
+{
+   struct hash_table *ht;
+
+   ht = ralloc(dst_mem_ctx, struct hash_table);
+   if (ht == NULL)
+  return NULL;
+
+   memcpy(ht, src, sizeof(struct hash_table));
+
+   ht->table = ralloc_array(ht, struct hash_entry, ht->size);
+   if (ht->table == NULL) {
+  ralloc_free(ht);
+  return NULL;
+   }
+
+   memcpy(ht->table, src->table, ht->size * sizeof(struct hash_entry));
+
+   return ht;
+}
+
 /**
  * Frees the given hash table.
  *
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index d3e0758b26..3846dad4b4 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -62,6 +62,8 @@ _mesa_hash_table_create(void *mem_ctx,
 uint32_t (*key_hash_function)(const void *key),
 bool (*key_equals_function)(const void *a,
 const void *b));
+struct hash_table *
+_mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx);
 void _mesa_hash_table_destroy(struct hash_table *ht,
   void (*delete_function)(struct hash_entry 
*entry));
 void _mesa_hash_table_clear(struct hash_table *ht,
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] Hash table cloning for copy propagation

2018-03-12 Thread Thomas Helland
This is a revival of some old patches I had around to improve
the compile times in the glsl compiler by reducing the time
spend inserting items in the hash table in opt_copy_propagation.
I've only rebased this, as my system don't even want to compile
anything right now. I also don't remember if it was thoroughly
tested, so that will have to be done. Sending it out as Dave
might be interested in this to mitigate some of the overhead
his soft-dobule implementation incurs.

CC: Dave Airlie <airl...@gmail.com>

Thomas Helland (2):
  util: Implement a hash table cloning function
  glsl: Use hash table cloning in copy propagation

 src/compiler/glsl/opt_copy_propagation.cpp | 13 --
 .../glsl/opt_copy_propagation_elements.cpp | 29 --
 src/util/hash_table.c  | 22 
 src/util/hash_table.h  |  2 ++
 4 files changed, 39 insertions(+), 27 deletions(-)

-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] glsl: Use hash table cloning in copy propagation

2018-03-12 Thread Thomas Helland
2018-03-12 5:23 GMT+01:00 Dave Airlie :
> Did anything ever comes of this series, trying some soft fp64 shaders,
> and glsl copy opt is taking 2-3 seconds on the big ones.
>
> Nearly all spent in hash table insertions.
>
> Dave.

I still have the patches around. I also have a new "pointer_map" in the
works that brings us much better cache behaviour. End result is a
substantial speedup in multiple usecases. I don't have a "clone"
function for the new pointer_map, but I believe it should be possible
to make one. I'll see if I can get those patches out this evening,
at least as an RFC

- Thomas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.

2018-01-30 Thread Thomas Helland
2018-01-31 3:38 GMT+01:00 Dave Airlie :
> On 31 January 2018 at 12:27, Dieter Nützel  wrote:
>> Ping!
>
> I'm not sure this was much of a win, and if it makes things worse in some 
> cases,
> then it needs a lot more investigation, so probably consider it dead for now.
>
> Dave.
>

I have a new hash table implementation lying around on my local machine
that is specifically designed for storing pointers. It lowers memory usage,
and has much better cache locality and general performance is nice.
It has reduced the impact of hash tables quite noticeably with the testing
that I've done as of yet. I'll see if I can get it out the door soon,
as it might
come in handy for usecases like this.

>>
>> Am 11.01.2018 04:53, schrieb Dave Airlie:
>>>
>>> From: Dave Airlie 
>>>
>>> This should make the merging of cmd buffers less CPU intensive,
>>> note I said *should* :)
>>> ---
>>>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47
>>> ---
>>>  1 file changed, 20 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
>>> b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
>>> index 0ee56f91447..9a39d237ae8 100644
>>> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
>>> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
>>> @@ -33,6 +33,7 @@
>>>  #include "radv_amdgpu_bo.h"
>>>  #include "sid.h"
>>>
>>> +#include "util/hash_table.h"
>>>
>>>  enum {
>>> VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
>>> @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct
>>> radv_amdgpu_winsys *ws,
>>> priorities[0] = 8;
>>> }
>>>
>>> +   struct hash_table *ht = _mesa_hash_table_create(NULL,
>>> _mesa_hash_pointer,
>>> +
>>> _mesa_key_pointer_equal);
>>> +
>>> for (unsigned i = 0; i < count + !!extra_cs; ++i) {
>>> struct radv_amdgpu_cs *cs;
>>>
>>> @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct
>>> radv_amdgpu_winsys *ws,
>>> if (!cs->num_buffers)
>>> continue;
>>>
>>> -   if (unique_bo_count == 0) {
>>> -   memcpy(handles, cs->handles,
>>> cs->num_buffers * sizeof(amdgpu_bo_handle));
>>> -   memcpy(priorities, cs->priorities,
>>> cs->num_buffers * sizeof(uint8_t));
>>> -   unique_bo_count = cs->num_buffers;
>>> -   continue;
>>> -   }
>>> -   int unique_bo_so_far = unique_bo_count;
>>> for (unsigned j = 0; j < cs->num_buffers; ++j) {
>>> -   bool found = false;
>>> -   for (unsigned k = 0; k < unique_bo_so_far;
>>> ++k) {
>>> -   if (handles[k] == cs->handles[j])
>>> {
>>> -   found = true;
>>> -   priorities[k] =
>>> MAX2(priorities[k],
>>> -
>>> cs->priorities[j]);
>>> -   break;
>>> -   }
>>> -   }
>>> -   if (!found) {
>>> +   struct hash_entry *entry =
>>> _mesa_hash_table_search(ht, (void
>>> *)cs->handles[j]);
>>> +   if (!entry) {
>>> +   _mesa_hash_table_insert(ht, (void
>>> *)cs->handles[j], (void
>>> *)(uintptr_t)unique_bo_count);
>>> handles[unique_bo_count] =
>>> cs->handles[j];
>>> priorities[unique_bo_count] =
>>> cs->priorities[j];
>>> ++unique_bo_count;
>>> +   } else {
>>> +   int bo_idx = (uint32_t)(unsigned
>>> long)entry->data;
>>> +   priorities[bo_idx] =
>>> MAX2(priorities[bo_idx],
>>> +
>>> cs->priorities[j]);
>>> }
>>> }
>>> for (unsigned j = 0; j < cs->num_virtual_buffers;
>>> ++j) {
>>> struct radv_amdgpu_winsys_bo *virtual_bo =
>>> radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
>>> for(unsigned k = 0; k <
>>> virtual_bo->bo_count; ++k) {
>>> struct radv_amdgpu_winsys_bo *bo =
>>> virtual_bo->bos[k];
>>> -   bool found = false;
>>> -   for (unsigned m = 0; m <
>>> unique_bo_count; ++m) {
>>> -   if (handles[m] == bo->bo)
>>> {
>>> -   found = true;

Re: [Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

2018-01-23 Thread Thomas Helland
2018-01-21 23:58 GMT+01:00 Eric Anholt <e...@anholt.net>:
> Thomas Helland <thomashellan...@gmail.com> writes:
>
>> Also, allocate worklist_elem in groups of 20, to reduce the burden of
>> allocation. Do not use rzalloc, as there is no need. This lets us drop
>> the number of calls to ralloc from aproximately 10% of all calls to
>> ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size.
>> This cuts the runtime of shader-db by 1%, while at the same time
>> reducing the number of stalled cycles, executed cycles, and executed
>> instructions by about 1 % as reported by perf. I did a five-run
>> benchmark pre and post and got a statistical variance less than 0.1% pre
>> and post. This was with i965's ir validation polluting the benchmark, so
>> the numbers are even better in release builds.
>>
>> Performance change as found with perf-diff:
>> 4.74% -0.23%  libc-2.26.so[.] _int_malloc
>> 1.88% -0.21%  libc-2.26.so[.] malloc
>> 2.27% +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
>> 2.95% -0.12%  libc-2.26.so[.] _int_free
>>   +0.11%  libmesa_dri_drivers.so  [.] worklist_push
>> 1.22% -0.08%  libc-2.26.so[.] malloc_consolidate
>> 0.16% -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
>> 1.21% +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
>> 0.75% -0.05%  libc-2.26.so[.] cfree@GLIBC_2.2.5
>> 0.50% -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
>> 0.57% +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
>> 1.29% -0.04%  libmesa_dri_drivers.so  [.] unsafe_free
>
> I'm curious, since a NIR instruction worklist seems like a generally
> useful thing to have:
>
> Could nir_worklist.c keep the implementation of this?
>
> Also, I wonder if it wouldn't be even better to have a u_dynarray of
> instructions in the worklist, with push/pop on the end of the array, and
> a struct set tracking the instructions in the array to avoid
> double-adding.  I actually don't know if that would be better or not, so
> I'd be happy with the worklist management just moved to nir_worklist.c.

I'll look into this to see what I can do. nir_worklist.c at this time has only
a block worklist. This numbers all the blocks, uses a bitset for checking
if the item is present, and uses an array with an index pointing to the
start of the queue of blocks in the buffer.

The same scheme could be easily used for ssa-defs, as these are
also numbered. I actually did this for the VRP pass I wrote years ago.

However, for instructions we do not have a way of numbering them,
so a different scheme would have to be used. A dynarray + set type
of thing, us you're suggesting, might get us where we want.
I'll see what I can come up with.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/1] Use a freelist in nir_opt_dce

2017-12-02 Thread Thomas Helland
2017-12-02 15:49 GMT+01:00 Thomas Helland <thomashellan...@gmail.com>:
> This patch tries to reduce the number of calls to ralloc in nir_opt_dce.
> Especially with scalarized shaders we have a bunch of calls to ralloc
> in this pass, hurting us quite bad. See the commit message for details.
>
> The other large caller to ralloc is nir_alu_instr_create, and it would
> be nice if we could allocate groups at a time also here. I'm not sure
> how we can deal with that though, as it does not allocate the same
> number of items each time. I'm also working on a similar approach for
 ^^^
That should be "number of bytes".

> the symbol table, but that is not quite ready yet.
>
> Thomas Helland (1):
>   nir: Use a freelist in nir_opt_dce to avoid spamming ralloc
>
>  src/compiler/nir/nir_opt_dce.c | 47 
> --
>  1 file changed, 32 insertions(+), 15 deletions(-)
>
> --
> 2.15.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/1] Use a freelist in nir_opt_dce

2017-12-02 Thread Thomas Helland
This patch tries to reduce the number of calls to ralloc in nir_opt_dce.
Especially with scalarized shaders we have a bunch of calls to ralloc
in this pass, hurting us quite bad. See the commit message for details.

The other large caller to ralloc is nir_alu_instr_create, and it would
be nice if we could allocate groups at a time also here. I'm not sure
how we can deal with that though, as it does not allocate the same
number of items each time. I'm also working on a similar approach for
the symbol table, but that is not quite ready yet.

Thomas Helland (1):
  nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

 src/compiler/nir/nir_opt_dce.c | 47 --
 1 file changed, 32 insertions(+), 15 deletions(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

2017-12-02 Thread Thomas Helland
Also, allocate worklist_elem in groups of 20, to reduce the burden of 
allocation. Do not use rzalloc, as there is no need. This lets us drop 
the number of calls to ralloc from aproximately 10% of all calls to 
ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size. 
This cuts the runtime of shader-db by 1%, while at the same time 
reducing the number of stalled cycles, executed cycles, and executed 
instructions by about 1 % as reported by perf. I did a five-run 
benchmark pre and post and got a statistical variance less than 0.1% pre 
and post. This was with i965's ir validation polluting the benchmark, so 
the numbers are even better in release builds.

Performance change as found with perf-diff:
4.74% -0.23%  libc-2.26.so[.] _int_malloc
1.88% -0.21%  libc-2.26.so[.] malloc
2.27% +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
2.95% -0.12%  libc-2.26.so[.] _int_free
  +0.11%  libmesa_dri_drivers.so  [.] worklist_push
1.22% -0.08%  libc-2.26.so[.] malloc_consolidate
0.16% -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
1.21% +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
0.75% -0.05%  libc-2.26.so[.] cfree@GLIBC_2.2.5
0.50% -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
0.57% +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
1.29% -0.04%  libmesa_dri_drivers.so  [.] unsafe_free
---
 src/compiler/nir/nir_opt_dce.c | 47 --
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c
index 5cefba3a72..f9285fe4ac 100644
--- a/src/compiler/nir/nir_opt_dce.c
+++ b/src/compiler/nir/nir_opt_dce.c
@@ -29,32 +29,46 @@
 
 /* SSA-based mark-and-sweep dead code elimination */
 
+typedef struct {
+   struct exec_list *worklist;
+   struct exec_list *free_nodes;
+} worklist;
+
 typedef struct {
struct exec_node node;
nir_instr *instr;
 } worklist_elem;
 
 static void
-worklist_push(struct exec_list *worklist, nir_instr *instr)
+worklist_push(worklist *worklist, nir_instr *instr)
 {
-   worklist_elem *elem = ralloc(worklist, worklist_elem);
+   if (exec_list_is_empty(worklist->free_nodes)) {
+  worklist_elem *elements = ralloc_array(worklist, worklist_elem, 20);
+  for (int i = 0; i < 20; i++)
+ exec_list_push_tail(worklist->free_nodes, [i].node);
+   }
+
+   struct exec_node *node = exec_list_pop_head(worklist->free_nodes);
+   worklist_elem *elem = exec_node_data(worklist_elem, node, node);
elem->instr = instr;
instr->pass_flags = 1;
-   exec_list_push_tail(worklist, >node);
+   exec_list_push_tail(worklist->worklist, >node);
 }
 
 static nir_instr *
-worklist_pop(struct exec_list *worklist)
+worklist_pop(worklist *worklist)
 {
-   struct exec_node *node = exec_list_pop_head(worklist);
+   
+   struct exec_node *node = exec_list_pop_head(worklist->worklist);
worklist_elem *elem = exec_node_data(worklist_elem, node, node);
+   exec_list_push_head(worklist->free_nodes, node);
return elem->instr;
 }
 
 static bool
 mark_live_cb(nir_src *src, void *_state)
 {
-   struct exec_list *worklist = (struct exec_list *) _state;
+   worklist *worklist = _state;
 
if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
   worklist_push(worklist, src->ssa->parent_instr);
@@ -64,7 +78,7 @@ mark_live_cb(nir_src *src, void *_state)
 }
 
 static void
-init_instr(nir_instr *instr, struct exec_list *worklist)
+init_instr(nir_instr *instr, worklist *worklist)
 {
nir_alu_instr *alu_instr;
nir_intrinsic_instr *intrin_instr;
@@ -113,7 +127,7 @@ init_instr(nir_instr *instr, struct exec_list *worklist)
 }
 
 static bool
-init_block(nir_block *block, struct exec_list *worklist)
+init_block(nir_block *block, worklist *worklist)
 {
nir_foreach_instr(instr, block)
   init_instr(instr, worklist);
@@ -131,19 +145,22 @@ init_block(nir_block *block, struct exec_list *worklist)
 static bool
 nir_opt_dce_impl(nir_function_impl *impl)
 {
-   struct exec_list *worklist = rzalloc(NULL, struct exec_list);
-   exec_list_make_empty(worklist);
+   worklist *wl = ralloc(NULL, worklist);
+   wl->free_nodes = ralloc(wl, struct exec_list);
+   wl->worklist = ralloc(wl, struct exec_list);
+   exec_list_make_empty(wl->free_nodes);
+   exec_list_make_empty(wl->worklist);
 
nir_foreach_block(block, impl) {
-  init_block(block, worklist);
+  init_block(block, wl);
}
 
-   while (!exec_list_is_empty(worklist)) {
-  nir_instr *instr = worklist_pop(worklist);
-  nir_foreach_src(instr, mark_live_cb, worklist);
+   while (!exec_list_is_empty(wl->worklist)) {
+  nir_instr *instr = worklist_pop(wl);
+  nir_foreach_src(instr, mark_live_cb, wl);
}
 
-   ralloc_free(worklist);
+   ralloc_free(wl);
 
bool progress = false;
 
-- 
2.15.0

___
mesa-dev mailing list

Re: [Mesa-dev] [PATCH 10/12] glsl: Minor cleanups after previous commit

2017-11-04 Thread Thomas Helland
Patches 6-10 are:

Reviewed-by: Thomas Helland <thomashellan...@gmail.com>

2017-11-02 21:25 GMT+01:00 Ian Romanick <i...@freedesktop.org>:
> From: Ian Romanick <ian.d.roman...@intel.com>
>
> I think it's more clear to only call emit_access once.  The only
> difference between the two calls is the value of size_mul used for the
> offset parameter... but you really have to look at it to be sure.
>
> The s/is_64bit/is_double/ change is because there are no int64_t or
> uint64_t matrix types.
>
> Signed-off-by: Ian Romanick <ian.d.roman...@intel.com>
> ---
>  src/compiler/glsl/lower_buffer_access.cpp | 29 +++--
>  1 file changed, 11 insertions(+), 18 deletions(-)
>
> diff --git a/src/compiler/glsl/lower_buffer_access.cpp 
> b/src/compiler/glsl/lower_buffer_access.cpp
> index 056fd26..fa6e5f5 100644
> --- a/src/compiler/glsl/lower_buffer_access.cpp
> +++ b/src/compiler/glsl/lower_buffer_access.cpp
> @@ -111,24 +111,17 @@ lower_buffer_access::emit_access(void *mem_ctx,
>   ir_dereference *col_deref =
>  new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), 
> col);
>
> - if (row_major) {
> -/* For a row-major matrix, the next column starts at the next
> - * element.
> - */
> -int size_mul = deref->type->is_64bit() ? 8 : 4;
> -emit_access(mem_ctx, is_write, col_deref, base_offset,
> -deref_offset + i * size_mul,
> -row_major, deref->type, packing,
> -
> writemask_for_size(col_deref->type->vector_elements));
> - } else {
> -const int size_mul =
> -   link_calculate_matrix_stride(deref->type, row_major, packing);
> -
> -emit_access(mem_ctx, is_write, col_deref, base_offset,
> -deref_offset + i * size_mul,
> -row_major, deref->type, packing,
> -
> writemask_for_size(col_deref->type->vector_elements));
> - }
> + /* For a row-major matrix, the next column starts at the next
> +  * element.  Otherwise it is offset by the matrix stride.
> +  */
> + const unsigned size_mul = row_major
> +? (deref->type->is_double() ? 8 : 4)
> +: link_calculate_matrix_stride(deref->type, row_major, packing);
> +
> + emit_access(mem_ctx, is_write, col_deref, base_offset,
> + deref_offset + i * size_mul,
> + row_major, deref->type, packing,
> + writemask_for_size(col_deref->type->vector_elements));
>}
>return;
> }
> --
> 2.9.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/12] glsl: Combine nop-swizzle optimization with swizzle-swizzle optimization

2017-11-02 Thread Thomas Helland
Changes all look really good, and some performance numbers
I have for a similar patch I've written shows a marginal benefit
in compiler runtime performance (perf-stat -> cycles executed)
For patches 4 and 5.

Reviewed-by: 

2. nov. 2017 21.26 skrev "Ian Romanick" :

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/compiler/Makefile.sources  |  3 +-
 src/compiler/glsl/glsl_parser_extras.cpp   |  3 +-
 src/compiler/glsl/ir_optimization.h|  3 +-
 src/compiler/glsl/meson.build  |  3 +-
 .../glsl/{opt_noop_swizzle.cpp => opt_swizzle.cpp} | 56 ++---
 src/compiler/glsl/opt_swizzle_swizzle.cpp  | 96
--
 src/compiler/glsl/test_optpass.cpp |  6 +-
 7 files changed, 52 insertions(+), 118 deletions(-)
 rename src/compiler/glsl/{opt_noop_swizzle.cpp => opt_swizzle.cpp} (56%)
 delete mode 100644 src/compiler/glsl/opt_swizzle_swizzle.cpp

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index b80468c..60814e2 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -130,11 +130,10 @@ LIBGLSL_FILES = \
glsl/opt_function_inlining.cpp \
glsl/opt_if_simplification.cpp \
glsl/opt_minmax.cpp \
-   glsl/opt_noop_swizzle.cpp \
glsl/opt_rebalance_tree.cpp \
glsl/opt_redundant_jumps.cpp \
glsl/opt_structure_splitting.cpp \
-   glsl/opt_swizzle_swizzle.cpp \
+   glsl/opt_swizzle.cpp \
glsl/opt_tree_grafting.cpp \
glsl/opt_vectorize.cpp \
glsl/program.h \
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
b/src/compiler/glsl/glsl_parser_extras.cpp
index 822301a..5982173 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2226,8 +2226,7 @@ do_common_optimization(exec_list *ir, bool linked,
options->EmitNoCont, options->EmitNoLoops);
OPT(do_vec_index_to_swizzle, ir);
OPT(lower_vector_insert, ir, false);
-   OPT(do_swizzle_swizzle, ir);
-   OPT(do_noop_swizzle, ir);
+   OPT(optimize_swizzles, ir);

OPT(optimize_split_arrays, ir, linked);
OPT(optimize_redundant_jumps, ir);
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_
optimization.h
index f44ddcb..2b8c195 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -123,9 +123,8 @@ bool lower_if_to_cond_assign(gl_shader_stage stage,
exec_list *instructions,
  unsigned max_depth = 0, unsigned
min_branch_cost = 0);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_minmax_prune(exec_list *instructions);
-bool do_noop_swizzle(exec_list *instructions);
 bool do_structure_splitting(exec_list *instructions);
-bool do_swizzle_swizzle(exec_list *instructions);
+bool optimize_swizzles(exec_list *instructions);
 bool do_vectorize(exec_list *instructions);
 bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
diff --git a/src/compiler/glsl/meson.build b/src/compiler/glsl/meson.build
index 76fcafb..6284d0c 100644
--- a/src/compiler/glsl/meson.build
+++ b/src/compiler/glsl/meson.build
@@ -169,11 +169,10 @@ files_libglsl = files(
   'opt_function_inlining.cpp',
   'opt_if_simplification.cpp',
   'opt_minmax.cpp',
-  'opt_noop_swizzle.cpp',
   'opt_rebalance_tree.cpp',
   'opt_redundant_jumps.cpp',
   'opt_structure_splitting.cpp',
-  'opt_swizzle_swizzle.cpp',
+  'opt_swizzle.cpp',
   'opt_tree_grafting.cpp',
   'opt_vectorize.cpp',
   'program.h',
diff --git a/src/compiler/glsl/opt_noop_swizzle.cpp b/src/compiler/glsl/opt_
swizzle.cpp
similarity index 56%
rename from src/compiler/glsl/opt_noop_swizzle.cpp
rename to src/compiler/glsl/opt_swizzle.cpp
index 41890ab..2fbe362 100644
--- a/src/compiler/glsl/opt_noop_swizzle.cpp
+++ b/src/compiler/glsl/opt_swizzle.cpp
@@ -22,11 +22,14 @@
  */

 /**
- * \file opt_noop_swizzle.cpp
+ * \file opt_swizzle.cpp
+ * Optimize swizzle operations.
  *
- * If a swizzle doesn't change the order or count of components, then
- * remove the swizzle so that other optimization passes see the value
- * behind it.
+ * First, compact a sequence of swizzled swizzles into a single swizzle.
+ *
+ * If the final resulting swizzle doesn't change the order or count of
+ * components, then remove the swizzle so that other optimization passes
see
+ * the value behind it.
  */

 #include "ir.h"
@@ -36,9 +39,9 @@

 namespace {

-class ir_noop_swizzle_visitor : public ir_rvalue_visitor {
+class ir_opt_swizzle_visitor : public ir_rvalue_visitor {
 public:
-   ir_noop_swizzle_visitor()
+   ir_opt_swizzle_visitor()
{
   this->progress = false;
}
@@ -50,13 +53,46 @@ public:
 } /* unnamed namespace */

 void
-ir_noop_swizzle_visitor::handle_rvalue(ir_rvalue **rvalue)

Re: [Mesa-dev] [PATCH 0/6] Prehash all the things

2017-10-10 Thread Thomas Helland
Hi!

Thanks for keeping up with the long wait =)
I revisited this not too long ago, and found that with the new
pointer hashing function the benefits are zero to negative
from this series. I've reduced it to only the instruction set and
the string_to_uint_map patch but it's not convincing.
I suspect we are seeing cache miss vs hashing tradeoffs.
So I've basically put it to rest for now. Might give it another
go sometime, but I think right now the effort is better spent
elsewhere in the codebase.

Greetings,
Thomas

2017-10-09 14:02 GMT+02:00 Dieter Nützel <die...@nuetzel-hh.de>:
> Hello Thomas,
>
> now, that you have write commit 'only' this one is missing.
> Maybe you have time for this.
>
> Latest version do not apply any longer.
>
> Wende an: util: Avoid computing hash twice in string_to_uint_map
> error: src/util/string_to_uint_map.h ist nicht im Index
> Anwendung des Patches fehlgeschlagen bei 0006 util: Avoid computing hash
> twice in string_to_uint_map
>
> Greetings,
> Dieter
>
>
> Am 19.06.2017 18:09, schrieb Dieter Nützel:
>>
>> Ping!
>>
>> Any news, reviews --- anyone?
>>
>> I'm running this all day without a hitch.
>>
>> Cheers,
>> Dieter
>>
>> Am 23.05.2017 05:40, schrieb Dieter Nützel:
>>>
>>> For the series:
>>>
>>> Tested-by: Dieter Nützel <die...@nuetzel-hh.de>
>>>
>>> on radeonsi/RX580
>>>
>>> Unigine_Heaven-4.0, Unigine_Valley-1.0, Unigine_Superposition-1.0,
>>> LS2015 (Wine-staging), Mesa-demos (objviewer)
>>>
>>> Dieter
>>>
>>> Am 22.05.2017 20:55, schrieb Thomas Helland:
>>>>
>>>> While this doesn't prehash all the things, it does switch quite a lot
>>>> of places from doing a search and then a subsequent insert to first
>>>> hash the key, and then use this hash when searching / inserting.
>>>> While our new pointer hashing function remedied much of our overhead
>>>> hashing pointers, there is still more to gain here. This cuts executed
>>>> instructions / task-clock by about 0.5% on a shader-db run on my i965
>>>> running machine. While that's not a lot, it is still a nice little
>>>> improvement on the way to less overhead. The changes should also be
>>>> fairly trivial, so it's not much of a burden.
>>>>
>>>> Thomas Helland (6):
>>>>   glsl: Prehash in refcount hash table to reduce hashing
>>>>   nir: Prehash in instr_set to avoid hashing twice
>>>>   glsl: Prehash in constant propagation
>>>>   glsl: Prehash in constant variable pass to avoid hashing twice
>>>>   glsl: Prehash to avoid computing the hash twice
>>>>   util: Avoid computing hash twice in string_to_uint_map
>>>>
>>>>  src/compiler/glsl/ir_variable_refcount.cpp  | 7 +--
>>>>  src/compiler/glsl/opt_constant_propagation.cpp  | 8 +---
>>>>  src/compiler/glsl/opt_constant_variable.cpp | 6 --
>>>>  src/compiler/glsl/opt_copy_propagation_elements.cpp | 7 +--
>>>>  src/compiler/nir/nir_instr_set.c| 7 +--
>>>>  src/util/string_to_uint_map.h   | 9 ++---
>>>>  6 files changed, 30 insertions(+), 14 deletions(-)
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: fix in-class initialization of static member

2017-09-27 Thread Thomas Helland
string_buffer_test.cpp:43: error: ISO C++ forbids initialization of
member ‘str1’
string_buffer_test.cpp:43: error: making ‘str1’ static
string_buffer_test.cpp:43: error: invalid in-class initialization of
static data member of non-integral type ‘const char*’

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103002
---
 src/util/tests/string_buffer/string_buffer_test.cpp | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/util/tests/string_buffer/string_buffer_test.cpp 
b/src/util/tests/string_buffer/string_buffer_test.cpp
index c3d43cb67b..545f607fad 100644
--- a/src/util/tests/string_buffer/string_buffer_test.cpp
+++ b/src/util/tests/string_buffer/string_buffer_test.cpp
@@ -40,9 +40,9 @@ class string_buffer : public ::testing::Test {
 public:
 
struct _mesa_string_buffer *buf;
-   const char *str1 = "test1";
-   const char *str2 = "test2";
-   const char *str3 = "test1test2";
+   const char *str1;
+   const char *str2;
+   const char *str3;
char str4[80];
char str5[40];
 
@@ -53,6 +53,9 @@ public:
 void
 string_buffer::SetUp()
 {
+   str1 = "test1";
+   str2 = "test2";
+   str3 = "test1test2";
buf = _mesa_string_buffer_create(NULL, INITIAL_BUF_SIZE);
 }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glcpp: Avoid unnecessary call to strlen

2017-09-26 Thread Thomas Helland
I've now pushed the series, so feel free to rebase =)

2017-09-22 16:10 GMT+02:00 Ian Romanick <i...@freedesktop.org>:
> This patch is
>
> Reviewed-by: Ian Romanick <ian.d.roman...@intel.com>
>
> I have a couple patches that go on top of this particular patch, and I'd
> rather rebase before I send them out for review. :)
>
> On 09/14/2017 03:39 PM, Thomas Helland wrote:
>> Length of the token was already calculated by flex and stored in yyleng,
>> no need to implicitly call strlen() via linear_strdup().
>>
>> Reviewed-by: Nicolai Hähnle 
>> Reviewed-by: Timothy Arceri <tarc...@itsqueeze.com>
>>
>> V2: Also convert this pattern in glsl_lexer.ll
>>
>> V3: Remove a misplaced comment
>>
>> V4: Use a temporary char to avoid type change
>> Remove bogus +1 on length check of identifier
>> ---
>>  src/compiler/glsl/glcpp/glcpp-lex.l |  9 -
>>  src/compiler/glsl/glsl_lexer.ll | 39 
>> +
>>  2 files changed, 39 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
>> b/src/compiler/glsl/glcpp/glcpp-lex.l
>> index 381b97364a..9cfcc12022 100644
>> --- a/src/compiler/glsl/glcpp/glcpp-lex.l
>> +++ b/src/compiler/glsl/glcpp/glcpp-lex.l
>> @@ -101,7 +101,14 @@ void glcpp_set_column (int  column_no , yyscan_t 
>> yyscanner);
>>  #define RETURN_STRING_TOKEN(token)   \
>>   do {\
>>   if (! parser->skipping) {   \
>> - yylval->str = linear_strdup(yyextra->linalloc, 
>> yytext); \
>> + /* We're not doing linear_strdup here, to avoid \
>> +  * an implicit call on strlen() for the length  \
>> +  * of the string, as this is already found by   \
>> +  * flex and stored in yyleng */ \
>> + void *mem_ctx = yyextra->linalloc;  \
>> + yylval->str = linear_alloc_child(mem_ctx,   \
>> +  yyleng + 1);   \
>> + memcpy(yylval->str, yytext, yyleng + 1);\
>>   RETURN_TOKEN_NEVER_SKIP (token);\
>>   }   \
>>   } while(0)
>> diff --git a/src/compiler/glsl/glsl_lexer.ll 
>> b/src/compiler/glsl/glsl_lexer.ll
>> index 7c41455d98..56519bf92d 100644
>> --- a/src/compiler/glsl/glsl_lexer.ll
>> +++ b/src/compiler/glsl/glsl_lexer.ll
>> @@ -81,8 +81,13 @@ static int classify_identifier(struct 
>> _mesa_glsl_parse_state *, const char *);
>> "illegal use of reserved word `%s'", yytext); \
>>return ERROR_TOK;  \
>>} else {  
>>  \
>> -  void *mem_ctx = yyextra->linalloc;
>>  \
>> -  yylval->identifier = linear_strdup(mem_ctx, yytext);   \
>> +  /* We're not doing linear_strdup here, to avoid an implicit\
>> +   * call on strlen() for the length of the string, as this is   \
>> +   * already found by flex and stored in yyleng */   \
>> +  void *mem_ctx = yyextra->linalloc; \
>> + char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1);   \
>> + memcpy(id, yytext, yyleng + 1);\
>> + yylval->identifier = id;   \
>>return classify_identifier(yyextra, yytext);   \
>>} 
>>  \
>> } while (0)
>> @@ -261,8 +266,14 @@ HASH ^{SPC}#{SPC}
>>  [ \t\r]* { }
>>  :return COLON;
>>  [_a-zA-Z][_a-zA-Z0-9]*   {
>> -void *mem_ctx = yyextra->linalloc;
>> -yylval->identifier = linear_strdup(mem_ctx, 
>> yytext);
>> +/* We're not doing linear_strdup here, to 
>> avoid an implicit call
>> + * on strlen() for the length of the 
>> string, as this is already
>> +  

Re: [Mesa-dev] [PATCH 07/22] glsl: Fix coding standards issues in lower_variable_index_to_cond_assign

2017-09-22 Thread Thomas Helland
2017-09-21 16:34 GMT+02:00 Ian Romanick <i...@freedesktop.org>:
> From: "\"Ian Romanick\"" <i...@freedesktop.org>
>

^ Something weird going on here? Apart from that, patches 1 - 7 are:

Reviewed-by: Thomas Helland <thomashellan...@gmail.com>

The regression from patch six I have no comments on.
Someone else than me should probably shed their idea.

> From: Ian Romanick <ian.d.roman...@intel.com>
>
> Mostly tabs-before-spaces, but there was some other trivium too.
>
> Signed-off-by: Ian Romanick <ian.d.roman...@intel.com>
> ---
>  .../lower_variable_index_to_cond_assign.cpp   | 154 +-
>  1 file changed, 76 insertions(+), 78 deletions(-)
>
> diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 
> b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> index dd49272..9e2dd831 100644
> --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
> @@ -71,12 +71,13 @@
>   */
>  ir_variable *
>  compare_index_block(exec_list *instructions, ir_variable *index,
> -   unsigned base, unsigned components, void *mem_ctx)
> +unsigned base, unsigned components, void *mem_ctx)
>  {
> ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index);
>
> assert(index->type->is_scalar());
> -   assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type 
> == GLSL_TYPE_UINT);
> +   assert(index->type->base_type == GLSL_TYPE_INT ||
> +  index->type->base_type == GLSL_TYPE_UINT);
> assert(components >= 1 && components <= 4);
>
> if (components > 1) {
> @@ -94,19 +95,18 @@ compare_index_block(exec_list *instructions, ir_variable 
> *index,
> test_indices_data.i[3] = base + 3;
>
> ir_constant *const test_indices =
> -  new(mem_ctx) ir_constant(broadcast_index->type,
> -  _indices_data);
> +  new(mem_ctx) ir_constant(broadcast_index->type, _indices_data);
>
> ir_rvalue *const condition_val =
>new(mem_ctx) ir_expression(ir_binop_equal,
> -glsl_type::bvec(components),
> -broadcast_index,
> -test_indices);
> + glsl_type::bvec(components),
> + broadcast_index,
> + test_indices);
>
> ir_variable *const condition =
>new(mem_ctx) ir_variable(condition_val->type,
> -  "dereference_condition",
> -  ir_var_temporary);
> +   "dereference_condition",
> +   ir_var_temporary);
> instructions->push_tail(condition);
>
> ir_rvalue *const cond_deref =
> @@ -133,7 +133,7 @@ class deref_replacer : public ir_rvalue_visitor {
>  public:
> deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
>: variable_to_replace(variable_to_replace), value(value),
> -   progress(false)
> +progress(false)
> {
>assert(this->variable_to_replace != NULL);
>assert(this->value != NULL);
> @@ -143,9 +143,9 @@ public:
> {
>ir_dereference_variable *const dv = 
> (*rvalue)->as_dereference_variable();
>
> -  if ((dv != NULL) && (dv->var == this->variable_to_replace)) {
> -this->progress = true;
> -*rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
> +  if (dv != NULL && dv->var == this->variable_to_replace) {
> + this->progress = true;
> + *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
>}
> }
>
> @@ -167,10 +167,10 @@ public:
>
> virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
> {
> -  if (is_array_or_matrix(ir->array)
> - && (ir->array_index->as_constant() == NULL)) {
> -this->deref = ir;
> -return visit_stop;
> +  if (is_array_or_matrix(ir->array) &&
> +  ir->array_index->as_constant() == NULL) {
> + this->deref = ir;
> + return visit_stop;
>}
>
>return visit_continue;
> @@ -222,8 +222,8 @@ struct assignment_generator
> */
>ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
>ir_assignment *const assignment = (is_write)
> -? new(mem_ctx) ir_assignment(element, variable, condition, 
> wr

Re: [Mesa-dev] [PATCH 2/5] util: Add tests for the string buffer

2017-09-21 Thread Thomas Helland
Fixed the missing newline at the end of this cpp file locally.
This is the only patch left in the series without an RB.
If there's no objections I plan on pushing this once I get an RB on this.
Someone mind having a look at it?

2017-09-11 22:21 GMT+02:00 Thomas Helland <thomashellan...@gmail.com>:
> More tests could probably be added, but this should cover
> concatenation, resizing, clearing, formatted printing,
> and checking the length, so it should be quite complete.
>
> V2: Address review feedback from Timothy, plus fixes
>- Use a large enough char array
>- Actually test the formatted appending
>- Test that clear function resets string length
>
> V3: Port to gtest
>
> V4: Fix test makefile
> Fix copyright header
> Fix missing extern C
> Use more appropriate name for C-file
> Add tests for append_char
> ---
>  configure.ac   |   1 +
>  src/util/Makefile.am   |   5 +-
>  src/util/tests/string_buffer/Makefile.am   |  40 +++
>  .../tests/string_buffer/string_buffer_test.cpp | 119 
> +
>  4 files changed, 164 insertions(+), 1 deletion(-)
>  create mode 100644 src/util/tests/string_buffer/Makefile.am
>  create mode 100644 src/util/tests/string_buffer/string_buffer_test.cpp
>
> diff --git a/configure.ac b/configure.ac
> index d0d4c0dfd1..20727c7bb4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2924,6 +2924,7 @@ AC_CONFIG_FILES([Makefile
>   src/mesa/state_tracker/tests/Makefile
>   src/util/Makefile
>   src/util/tests/hash_table/Makefile
> + src/util/tests/string_buffer/Makefile
>   src/util/xmlpool/Makefile
>   src/vulkan/Makefile])
>
> diff --git a/src/util/Makefile.am b/src/util/Makefile.am
> index 4512dc99d5..2b47143ad7 100644
> --- a/src/util/Makefile.am
> +++ b/src/util/Makefile.am
> @@ -19,7 +19,10 @@
>  # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
>  # IN THE SOFTWARE.
>
> -SUBDIRS = xmlpool . tests/hash_table
> +SUBDIRS = . \
> +   xmlpool \
> +   tests/hash_table \
> +   tests/string_buffer
>
>  include Makefile.sources
>
> diff --git a/src/util/tests/string_buffer/Makefile.am 
> b/src/util/tests/string_buffer/Makefile.am
> new file mode 100644
> index 00..bd04d86349
> --- /dev/null
> +++ b/src/util/tests/string_buffer/Makefile.am
> @@ -0,0 +1,40 @@
> +# Copyright © 2017 Thomas Helland
> +#
> +#  Permission is hereby granted, free of charge, to any person obtaining a
> +#  copy of this software and associated documentation files (the "Software"),
> +#  to deal in the Software without restriction, including without limitation
> +#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
> +#  and/or sell copies of the Software, and to permit persons to whom the
> +#  Software is furnished to do so, subject to the following conditions:
> +#
> +#  The above copyright notice and this permission notice (including the next
> +#  paragraph) shall be included in all copies or substantial portions of the
> +#  Software.
> +#
> +#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> +#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> +#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> +#  IN THE SOFTWARE.
> +
> +AM_CPPFLAGS = \
> +   -I$(top_srcdir)/src \
> +   -I$(top_srcdir)/include \
> +   -I$(top_srcdir)/src/gtest/include \
> +   $(PTHREAD_CFLAGS) \
> +   $(DEFINES)
> +
> +TESTS = string_buffer_test
> +
> +check_PROGRAMS = $(TESTS)
> +
> +string_buffer_test_SOURCES = \
> +   string_buffer_test.cpp
> +
> +string_buffer_test_LDADD = \
> +   $(top_builddir)/src/gtest/libgtest.la \
> +   $(top_builddir)/src/util/libmesautil.la \
> +   $(PTHREAD_LIBS) \
> +   $(DLOPEN_LIBS)
> diff --git a/src/util/tests/string_buffer/string_buffer_test.cpp 
> b/src/util/tests/string_buffer/string_buffer_test.cpp
> new file mode 100644
> index 00..e80ee8b135
> --- /dev/null
> +++ b/src/util/tests/string_buffer/string_buffer_test.cpp
> @@ -0,0 +1,119 @@
> +/*
> + * Copyright © 2017 Thomas Helland
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this

Re: [Mesa-dev] [PATCH 2/2] glsl: merge loop_controls.cpp with loop_unroll.cpp

2017-09-20 Thread Thomas Helland
I've only skimmed this, but it looks trivial and correct.
This patch series is:
Reviewed-by: Thomas Helland <thomashellan...@gmail.com>

I like the idea of getting rid of some of the walks over the IR,
and have a set of patches sitting locally doing just that.
I think I got most of them reviewed, but then it stalled.
I've just applied for an account though, so then I won't
have to bother others to push my patches for me =)

2017-09-19 4:14 GMT+02:00 Timothy Arceri <tarc...@itsqueeze.com>:
> Having this separate just makes the code harder to follow, and
> requires an extra walk of the IR.
> ---
>  src/compiler/Makefile.sources|   1 -
>  src/compiler/glsl/glsl_parser_extras.cpp |   1 -
>  src/compiler/glsl/loop_analysis.h|  16 -
>  src/compiler/glsl/loop_controls.cpp  | 108 
> ---
>  src/compiler/glsl/loop_unroll.cpp|  36 ++-
>  5 files changed, 34 insertions(+), 128 deletions(-)
>  delete mode 100644 src/compiler/glsl/loop_controls.cpp
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index e03a3f8738c..7146d3db367 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -80,7 +80,6 @@ LIBGLSL_FILES = \
> glsl/list.h \
> glsl/loop_analysis.cpp \
> glsl/loop_analysis.h \
> -   glsl/loop_controls.cpp \
> glsl/loop_unroll.cpp \
> glsl/lower_blend_equation_advanced.cpp \
> glsl/lower_buffer_access.cpp \
> diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
> b/src/compiler/glsl/glsl_parser_extras.cpp
> index fc56f21a5f0..98151fdb4a4 100644
> --- a/src/compiler/glsl/glsl_parser_extras.cpp
> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
> @@ -2218,7 +2218,6 @@ do_common_optimization(exec_list *ir, bool linked,
> if (options->MaxUnrollIterations) {
>loop_state *ls = analyze_loop_variables(ir);
>if (ls->loop_found) {
> - OPT(set_loop_controls, ir, ls);
>   OPT(unroll_loops, ir, ls, options);
>}
>delete ls;
> diff --git a/src/compiler/glsl/loop_analysis.h 
> b/src/compiler/glsl/loop_analysis.h
> index e2eff9dbaed..8f824046945 100644
> --- a/src/compiler/glsl/loop_analysis.h
> +++ b/src/compiler/glsl/loop_analysis.h
> @@ -35,22 +35,6 @@ extern class loop_state *
>  analyze_loop_variables(exec_list *instructions);
>
>
> -/**
> - * Fill in loop control fields
> - *
> - * Based on analysis of loop variables, this function tries to remove
> - * redundant sequences in the loop of the form
> - *
> - *  (if (expression bool ...) (break))
> - *
> - * For example, if it is provable that one loop exit condition will
> - * always be satisfied before another, the unnecessary exit condition will be
> - * removed.
> - */
> -extern bool
> -set_loop_controls(exec_list *instructions, loop_state *ls);
> -
> -
>  extern bool
>  unroll_loops(exec_list *instructions, loop_state *ls,
>   const struct gl_shader_compiler_options *options);
> diff --git a/src/compiler/glsl/loop_controls.cpp 
> b/src/compiler/glsl/loop_controls.cpp
> deleted file mode 100644
> index ad4aa189411..000
> --- a/src/compiler/glsl/loop_controls.cpp
> +++ /dev/null
> @@ -1,108 +0,0 @@
> -/*
> - * Copyright © 2010 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> - * DEALINGS IN THE SOFTWARE.
> - */
> -
> -#include 
> -#include "main/compiler.h"
> -#include "compiler/glsl_types.h"
> -#include "loop_analysis.h"
> -#include "ir_hierarchical_visitor.h"
>

Re: [Mesa-dev] [PATCH mesa] docs/submittingpatches: add 'test each commit' instructions

2017-09-15 Thread Thomas Helland
This patch is:

Reviewed-by: Thomas Helland <thomashellan...@gmail.com>

2017-09-15 19:09 GMT+02:00 Eric Engestrom <eric.engest...@imgtec.com>:
> Signed-off-by: Eric Engestrom <eric.engest...@imgtec.com>
> ---
>  docs/submittingpatches.html | 12 
>  1 file changed, 12 insertions(+)
>
> diff --git a/docs/submittingpatches.html b/docs/submittingpatches.html
> index ecf9590a95..0581391b29 100644
> --- a/docs/submittingpatches.html
> +++ b/docs/submittingpatches.html
> @@ -151,6 +151,18 @@ Testing Patches
>  to check for regressions.
>  
>
> +
> +As mentioned at the begining, patches should be bisectable.
> +A good way to test this is to make use of the `git rebase` command,
> +to run your tests on each commit. Assuming your branch is based off
> +origin/master, you can run:
> +
> +$ git rebase --interactive --exec "make check" origin/master
> +
> +replacing "make check" with whatever other test you want to
> +run.
> +
> +
>
>  Mailing Patches
>
> --
> Cheers,
>   Eric
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: Add a string buffer implementation

2017-09-14 Thread Thomas Helland
Based on Vladislav Egorovs work on the preprocessor, but split
out to a util functionality that should be universal. Setup, teardown,
memory handling and general layout is modeled around the hash_table
and the set, to make it familiar for everyone.

A notable change is that this implementation is always null terminated.
The rationale is that it will be less error-prone, as one might
access the buffer directly, thereby reading a non-terminated string.
Also, vsnprintf and friends prints the null-terminator.

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

V2: Address review feedback from Timothy and Grazvydas
   - Fix MINGW preprocessor check
   - Changed len from uint to int
   - Make string argument const in append function
   - Move to header and inline append function
   - Add crimp_to_fit function for resizing buffer

V3: Move include of ralloc to string_buffer.h

V4: Use u_string.h for a cross-platform working vsnprintf

V5: Remember to cast to char * in crimp function

V6: Address review feedback from Nicolai
   - Handle !str->buf in buffer_create
   - Ensure va_end is always called in buffer_append_all
   - Add overflow check in buffer_append_len
   - Do not expose buffer_space_left, just remove it
   - Clarify why a loop is used in vprintf, change to for-loop
   - Add a va_copy to buffer_vprintf to fix failure to append arguments
 when having to resize the buffer for vsnprintf.

V7: Address more review feedback from Nicolai
   - Add missing va_end corresponding to va_copy
   - Error check failure to allocate in crimp_to_fit
---
 src/util/Makefile.sources |   2 +
 src/util/string_buffer.c  | 148 ++
 src/util/string_buffer.h  | 104 
 3 files changed, 254 insertions(+)
 create mode 100644 src/util/string_buffer.c
 create mode 100644 src/util/string_buffer.h

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 4ed4e39f03..c7f6516a99 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -37,6 +37,8 @@ MESA_UTIL_FILES := \
simple_list.h \
slab.c \
slab.h \
+   string_buffer.c \
+   string_buffer.h \
strndup.h \
strtod.c \
strtod.h \
diff --git a/src/util/string_buffer.c b/src/util/string_buffer.c
new file mode 100644
index 00..c33173bfa0
--- /dev/null
+++ b/src/util/string_buffer.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "string_buffer.h"
+
+static bool
+ensure_capacity(struct _mesa_string_buffer *str, uint32_t needed_capacity)
+{
+   if (needed_capacity <= str->capacity)
+  return true;
+
+   /* Too small, double until we can fit the new string */
+   uint32_t new_capacity = str->capacity * 2;
+   while (needed_capacity > new_capacity)
+  new_capacity *= 2;
+
+   str->buf = reralloc_array_size(str, str->buf, sizeof(char), new_capacity);
+   if (str->buf == NULL)
+  return false;
+
+   str->capacity = new_capacity;
+   return true;
+}
+
+struct _mesa_string_buffer *
+_mesa_string_buffer_create(void *mem_ctx, uint32_t initial_capacity)
+{
+   struct _mesa_string_buffer *str;
+   str = ralloc(mem_ctx, struct _mesa_string_buffer);
+
+   if (str == NULL)
+  return NULL;
+
+   /* If no initial capacity is set then set it to something */
+   str->capacity = initial_capacity ? initial_capacity : 32;
+   str->buf = ralloc_array(str, char, str->capacity);
+
+   if (!str->buf) {
+  ralloc_free(str);
+  return NULL;
+   }
+
+   str->length = 0;
+   str->buf[str->length] = '\0';
+   return str;
+}
+
+bool
+_mesa_string_buffer_append_all(struct _mesa_string_buffer *str,
+   uint32_t num_args, ...)
+{
+   int i;
+   char* s;
+   va_list args;
+   v

[Mesa-dev] [PATCH] glcpp: Avoid unnecessary call to strlen

2017-09-14 Thread Thomas Helland
Length of the token was already calculated by flex and stored in yyleng,
no need to implicitly call strlen() via linear_strdup().

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Timothy Arceri 

V2: Also convert this pattern in glsl_lexer.ll

V3: Remove a misplaced comment

V4: Use a temporary char to avoid type change
Remove bogus +1 on length check of identifier
---
 src/compiler/glsl/glcpp/glcpp-lex.l |  9 -
 src/compiler/glsl/glsl_lexer.ll | 39 +
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
b/src/compiler/glsl/glcpp/glcpp-lex.l
index 381b97364a..9cfcc12022 100644
--- a/src/compiler/glsl/glcpp/glcpp-lex.l
+++ b/src/compiler/glsl/glcpp/glcpp-lex.l
@@ -101,7 +101,14 @@ void glcpp_set_column (int  column_no , yyscan_t 
yyscanner);
 #define RETURN_STRING_TOKEN(token) \
do {\
if (! parser->skipping) {   \
-   yylval->str = linear_strdup(yyextra->linalloc, yytext); 
\
+   /* We're not doing linear_strdup here, to avoid \
+* an implicit call on strlen() for the length  \
+* of the string, as this is already found by   \
+* flex and stored in yyleng */ \
+   void *mem_ctx = yyextra->linalloc;  \
+   yylval->str = linear_alloc_child(mem_ctx,   \
+yyleng + 1);   \
+   memcpy(yylval->str, yytext, yyleng + 1);\
RETURN_TOKEN_NEVER_SKIP (token);\
}   \
} while(0)
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 7c41455d98..56519bf92d 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -81,8 +81,13 @@ static int classify_identifier(struct _mesa_glsl_parse_state 
*, const char *);
  "illegal use of reserved word `%s'", yytext); \
 return ERROR_TOK;  \
   } else { \
-void *mem_ctx = yyextra->linalloc; 
\
-yylval->identifier = linear_strdup(mem_ctx, yytext);   \
+/* We're not doing linear_strdup here, to avoid an implicit\
+ * call on strlen() for the length of the string, as this is   \
+ * already found by flex and stored in yyleng */   \
+void *mem_ctx = yyextra->linalloc; \
+ char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1);   \
+ memcpy(id, yytext, yyleng + 1);\
+ yylval->identifier = id;   \
 return classify_identifier(yyextra, yytext);   \
   }
\
} while (0)
@@ -261,8 +266,14 @@ HASH   ^{SPC}#{SPC}
 [ \t\r]*   { }
 :  return COLON;
 [_a-zA-Z][_a-zA-Z0-9]* {
-  void *mem_ctx = yyextra->linalloc;
-  yylval->identifier = linear_strdup(mem_ctx, 
yytext);
+  /* We're not doing linear_strdup here, to 
avoid an implicit call
+   * on strlen() for the length of the string, 
as this is already
+   * found by flex and stored in yyleng
+   */
+void *mem_ctx = yyextra->linalloc;
+char *id = (char *) 
linear_alloc_child(mem_ctx, yyleng + 1);
+memcpy(id, yytext, yyleng + 1);
+yylval->identifier = id;
   return IDENTIFIER;
}
 [1-9][0-9]*{
@@ -449,8 +460,14 @@ layout {
   || yyextra->ARB_tessellation_shader_enable) {
  return LAYOUT_TOK;
   } else {
- void *mem_ctx = yyextra->linalloc;
- yylval->identifier = linear_strdup(mem_ctx, yytext);
+ /* We're not doing linear_strdup here, to avoid an 
implicit call
+  * on strlen() for the length of the string, as this is 
already
+  * found by flex and stored in yyleng
+  */
+  void *mem_ctx = yyextra->linalloc;
+   

Re: [Mesa-dev] [PATCH 5/5] glcpp: Avoid unnecessary call to strlen

2017-09-14 Thread Thomas Helland
2017-09-14 3:29 GMT+02:00 Ian Romanick <i...@freedesktop.org>:
> On 09/11/2017 01:21 PM, Thomas Helland wrote:
>> Length of the token was already calculated by flex and stored in yyleng,
>> no need to implicitly call strlen() via linear_strdup().
>>
>> Reviewed-by: Nicolai Hähnle 
>> Reviewed-by: Timothy Arceri <tarc...@itsqueeze.com>
>>
>> V2: Also convert this pattern in glsl_lexer.ll
>>
>> V3: Remove a misplaced comment
>> Fix compile warning from V2
>> ---
>>  src/compiler/glsl/glcpp/glcpp-lex.l |   9 +-
>>  src/compiler/glsl/glsl_lexer.ll |  32 -
>>  src/compiler/glsl/glsl_parser.yy| 246 
>> ++--
>>  3 files changed, 157 insertions(+), 130 deletions(-)
>>
>> diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
>> b/src/compiler/glsl/glcpp/glcpp-lex.l
>> index 381b97364a..9cfcc12022 100644
>> --- a/src/compiler/glsl/glcpp/glcpp-lex.l
>> +++ b/src/compiler/glsl/glcpp/glcpp-lex.l
>> @@ -101,7 +101,14 @@ void glcpp_set_column (int  column_no , yyscan_t 
>> yyscanner);
>>  #define RETURN_STRING_TOKEN(token)   \
>>   do {\
>>   if (! parser->skipping) {   \
>> - yylval->str = linear_strdup(yyextra->linalloc, 
>> yytext); \
>> + /* We're not doing linear_strdup here, to avoid \
>> +  * an implicit call on strlen() for the length  \
>> +  * of the string, as this is already found by   \
>> +  * flex and stored in yyleng */ \
>> + void *mem_ctx = yyextra->linalloc;  \
>> + yylval->str = linear_alloc_child(mem_ctx,   \
>> +  yyleng + 1);   \
>> + memcpy(yylval->str, yytext, yyleng + 1);\
>>   RETURN_TOKEN_NEVER_SKIP (token);\
>>   }   \
>>   } while(0)
>> diff --git a/src/compiler/glsl/glsl_lexer.ll 
>> b/src/compiler/glsl/glsl_lexer.ll
>> index 7c41455d98..3a67f0ea40 100644
>> --- a/src/compiler/glsl/glsl_lexer.ll
>> +++ b/src/compiler/glsl/glsl_lexer.ll
>> @@ -81,8 +81,13 @@ static int classify_identifier(struct 
>> _mesa_glsl_parse_state *, const char *);
>> "illegal use of reserved word `%s'", yytext); \
>>return ERROR_TOK;  \
>>} else {  
>>  \
>> -  void *mem_ctx = yyextra->linalloc;
>>  \
>> -  yylval->identifier = linear_strdup(mem_ctx, yytext);   \
>> +  /* We're not doing linear_strdup here, to avoid an implicit\
>> +   * call on strlen() for the length of the string, as this is   \
>> +   * already found by flex and stored in yyleng */   \
>> +  void *mem_ctx = yyextra->linalloc; \
>> +  yylval->identifier = (char *) linear_alloc_child(mem_ctx,  \
>> +   yyleng + 1);  \
>> +  memcpy(yylval->identifier, yytext, yyleng + 1);\
>
> Could this (here and below) be implemented as:
>
> char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1);
> memcpy(id, yytext, yyleng + 1);
> yylval->identifier = id;
>
> Then the type of identifier doesn't have to change, and the last hunk is
> unnecessary.
>

That should do the trick, not sure why I didn't think of that.
I'll roll up a reworked patch this evening, or during the weekend.

>>return classify_identifier(yyextra, yytext);   \
>>} 
>>  \
>> } while (0)
>> @@ -261,8 +266,13 @@ HASH ^{SPC}#{SPC}
>>  [ \t\r]* { }
>>  :return COLON;
>>  [_a-zA-Z][_a-zA-Z0-9]*   {
>> +/* We're not doing linear_strdup here, to 
>> avoid an implicit call
>> + * on strlen() for the length of the 
>> string, as this is already
>> + * found by flex and stored in yyleng
>> + 

Re: [Mesa-dev] [PATCH 5/5] glcpp: Avoid unnecessary call to strlen

2017-09-14 Thread Thomas Helland
2017-09-14 3:32 GMT+02:00 Ian Romanick <i...@freedesktop.org>:
> On 09/11/2017 01:21 PM, Thomas Helland wrote:
>> @@ -621,12 +636,17 @@ u64vec4 KEYWORD_WITH_ALT(0, 0, 0, 0, 
>> yyextra->ARB_gpu_shader_int64_enable, U64V
>>  [_a-zA-Z][_a-zA-Z0-9]*   {
>>   struct _mesa_glsl_parse_state *state = yyextra;
>>   void *ctx = state->linalloc;
>> - if (state->es_shader && strlen(yytext) > 1024) {
>> + if (state->es_shader && yyleng + 1 > 1024) {
>
> Also... I don't think this is right.  Shouldn't this just be 'yylen > 1024'?
>

Yes, you are absolutely right. Not sure how this got to be like this.
Good job catching that!

>>  _mesa_glsl_error(yylloc, state,
>>   "Identifier `%s' exceeds 1024 
>> characters",
>>   yytext);
>>   } else {
>> -   yylval->identifier = linear_strdup(ctx, yytext);
>> +   /* We're not doing linear_strdup here, to avoid 
>> an implicit call
>> +* on strlen() for the length of the string, as 
>> this is already
>> +* found by flex and stored in yyleng
>> +*/
>> +   yylval->identifier = (char *) 
>> linear_alloc_child(ctx, yyleng + 1);
>> +   memcpy(yylval->identifier, yytext, yyleng + 1);
>>   }
>>   return classify_identifier(state, yytext);
>>   }
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Resend of string buffer series

2017-09-14 Thread Thomas Helland
2017-09-14 5:54 GMT+02:00 Dieter Nützel <die...@nuetzel-hh.de>:
> Even for this series, too:
>
> Tested-by: Dieter Nützel <die...@nuetzel-hh.de>
>
> You 'lost' all of my former tb's.
>

Thanks!

Sorry for forgetting to add your TB. I'll get it done before merging.
I'll look into fixing the last review comments, get me an fd-o account
and git access, and get these merged sooner rather than later.

> Dieter
>
>
> Am 11.09.2017 22:21, schrieb Thomas Helland:
>>
>> I think I should have addressed all review feedback pointed out
>> to me by Nicolai, fixed the build issues with the tests (thanks
>> to Eric and Emil) and I've discovered another bug thanks to
>> strengthening the gtest test even more. I've also given it some
>> more polishing and slight modifications to make comments clearer
>> and more precise, and variable names better explaining their intet
>> in a couple of places. Details of changes in each patch. A big thanks
>> to those who have helped review this series =)
>>
>> Thomas Helland (5):
>>   util: Add a string buffer implementation
>>   util: Add tests for the string buffer
>>   glsl: Change the parser to use the string buffer
>>   glcpp: Use string_buffer for line continuation removal
>>   glcpp: Avoid unnecessary call to strlen
>>
>>  configure.ac   |   1 +
>>  src/compiler/glsl/glcpp/glcpp-lex.l|   9 +-
>>  src/compiler/glsl/glcpp/glcpp-parse.y  | 195 +---
>>  src/compiler/glsl/glcpp/glcpp.h|   8 +-
>>  src/compiler/glsl/glcpp/pp.c   |  64 +++---
>>  src/compiler/glsl/glsl_lexer.ll|  32 ++-
>>  src/compiler/glsl/glsl_parser.yy   | 246
>> ++---
>>  src/util/Makefile.am   |   5 +-
>>  src/util/Makefile.sources  |   2 +
>>  src/util/string_buffer.c   | 147 
>>  src/util/string_buffer.h   |  99 +
>>  src/util/tests/string_buffer/Makefile.am   |  40 
>>  .../tests/string_buffer/string_buffer_test.cpp | 119 ++
>>  13 files changed, 665 insertions(+), 302 deletions(-)
>>  create mode 100644 src/util/string_buffer.c
>>  create mode 100644 src/util/string_buffer.h
>>  create mode 100644 src/util/tests/string_buffer/Makefile.am
>>  create mode 100644 src/util/tests/string_buffer/string_buffer_test.cpp
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] util: Add tests for the string buffer

2017-09-11 Thread Thomas Helland
More tests could probably be added, but this should cover
concatenation, resizing, clearing, formatted printing,
and checking the length, so it should be quite complete.

V2: Address review feedback from Timothy, plus fixes
   - Use a large enough char array
   - Actually test the formatted appending
   - Test that clear function resets string length

V3: Port to gtest

V4: Fix test makefile
Fix copyright header
Fix missing extern C
Use more appropriate name for C-file
Add tests for append_char
---
 configure.ac   |   1 +
 src/util/Makefile.am   |   5 +-
 src/util/tests/string_buffer/Makefile.am   |  40 +++
 .../tests/string_buffer/string_buffer_test.cpp | 119 +
 4 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 src/util/tests/string_buffer/Makefile.am
 create mode 100644 src/util/tests/string_buffer/string_buffer_test.cpp

diff --git a/configure.ac b/configure.ac
index d0d4c0dfd1..20727c7bb4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2924,6 +2924,7 @@ AC_CONFIG_FILES([Makefile
  src/mesa/state_tracker/tests/Makefile
  src/util/Makefile
  src/util/tests/hash_table/Makefile
+ src/util/tests/string_buffer/Makefile
  src/util/xmlpool/Makefile
  src/vulkan/Makefile])
 
diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index 4512dc99d5..2b47143ad7 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -19,7 +19,10 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-SUBDIRS = xmlpool . tests/hash_table
+SUBDIRS = . \
+   xmlpool \
+   tests/hash_table \
+   tests/string_buffer
 
 include Makefile.sources
 
diff --git a/src/util/tests/string_buffer/Makefile.am 
b/src/util/tests/string_buffer/Makefile.am
new file mode 100644
index 00..bd04d86349
--- /dev/null
+++ b/src/util/tests/string_buffer/Makefile.am
@@ -0,0 +1,40 @@
+# Copyright © 2017 Thomas Helland
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice (including the next
+#  paragraph) shall be included in all copies or substantial portions of the
+#  Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+#  IN THE SOFTWARE.
+
+AM_CPPFLAGS = \
+   -I$(top_srcdir)/src \
+   -I$(top_srcdir)/include \
+   -I$(top_srcdir)/src/gtest/include \
+   $(PTHREAD_CFLAGS) \
+   $(DEFINES)
+
+TESTS = string_buffer_test
+
+check_PROGRAMS = $(TESTS)
+
+string_buffer_test_SOURCES = \
+   string_buffer_test.cpp
+
+string_buffer_test_LDADD = \
+   $(top_builddir)/src/gtest/libgtest.la \
+   $(top_builddir)/src/util/libmesautil.la \
+   $(PTHREAD_LIBS) \
+   $(DLOPEN_LIBS)
diff --git a/src/util/tests/string_buffer/string_buffer_test.cpp 
b/src/util/tests/string_buffer/string_buffer_test.cpp
new file mode 100644
index 00..e80ee8b135
--- /dev/null
+++ b/src/util/tests/string_buffer/string_buffer_test.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR O

[Mesa-dev] [PATCH 1/5] util: Add a string buffer implementation

2017-09-11 Thread Thomas Helland
Based on Vladislav Egorovs work on the preprocessor, but split
out to a util functionality that should be universal. Setup, teardown,
memory handling and general layout is modeled around the hash_table
and the set, to make it familiar for everyone.

A notable change is that this implementation is always null terminated.
The rationale is that it will be less error-prone, as one might
access the buffer directly, thereby reading a non-terminated string.
Also, vsnprintf and friends prints the null-terminator.

V2: Address review feedback from Timothy and Grazvydas
   - Fix MINGW preprocessor check
   - Changed len from uint to int
   - Make string argument const in append function
   - Move to header and inline append function
   - Add crimp_to_fit function for resizing buffer

V3: Move include of ralloc to string_buffer.h

V4: Use u_string.h for a cross-platform working vsnprintf

V5: Remember to cast to char * in crimp function

V6: Address review feedback from Nicolai
   - Handle !str->buf in buffer_create
   - Ensure va_end is always called in buffer_append_all
   - Add overflow check in buffer_append_len
   - Do not expose buffer_space_left, just remove it
   - Clarify why a loop is used in vprintf, change to for-loop
   - Add a va_copy to buffer_vprintf to fix failure to append arguments
 when having to resize the buffer for vsnprintf.
---
 src/util/Makefile.sources |   2 +
 src/util/string_buffer.c  | 147 ++
 src/util/string_buffer.h  |  99 +++
 3 files changed, 248 insertions(+)
 create mode 100644 src/util/string_buffer.c
 create mode 100644 src/util/string_buffer.h

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 4ed4e39f03..c7f6516a99 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -37,6 +37,8 @@ MESA_UTIL_FILES := \
simple_list.h \
slab.c \
slab.h \
+   string_buffer.c \
+   string_buffer.h \
strndup.h \
strtod.c \
strtod.h \
diff --git a/src/util/string_buffer.c b/src/util/string_buffer.c
new file mode 100644
index 00..e43824948f
--- /dev/null
+++ b/src/util/string_buffer.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "string_buffer.h"
+
+static bool
+ensure_capacity(struct _mesa_string_buffer *str, uint32_t needed_capacity)
+{
+   if (needed_capacity <= str->capacity)
+  return true;
+
+   /* Too small, double until we can fit the new string */
+   uint32_t new_capacity = str->capacity * 2;
+   while (needed_capacity > new_capacity)
+  new_capacity *= 2;
+
+   str->buf = reralloc_array_size(str, str->buf, sizeof(char), new_capacity);
+   if (str->buf == NULL)
+  return false;
+
+   str->capacity = new_capacity;
+   return true;
+}
+
+struct _mesa_string_buffer *
+_mesa_string_buffer_create(void *mem_ctx, uint32_t initial_capacity)
+{
+   struct _mesa_string_buffer *str;
+   str = ralloc(mem_ctx, struct _mesa_string_buffer);
+
+   if (str == NULL)
+  return NULL;
+
+   /* If no initial capacity is set then set it to something */
+   str->capacity = initial_capacity ? initial_capacity : 32;
+   str->buf = ralloc_array(str, char, str->capacity);
+
+   if (!str->buf) {
+  ralloc_free(str);
+  return NULL;
+   }
+
+   str->length = 0;
+   str->buf[str->length] = '\0';
+   return str;
+}
+
+bool
+_mesa_string_buffer_append_all(struct _mesa_string_buffer *str,
+   uint32_t num_args, ...)
+{
+   int i;
+   char* s;
+   va_list args;
+   va_start(args, num_args);
+   for (i = 0; i < num_args; i++) {
+  s = va_arg(args, char*);
+  if (!_mesa_string_buffer_append_len(str, s, strlen(s))) {
+ va_end(args);
+  

[Mesa-dev] [PATCH 4/5] glcpp: Use string_buffer for line continuation removal

2017-09-11 Thread Thomas Helland
Migrate removal of line continuations to string_buffer. Before this
it used ralloc_strncat() to append strings, which internally
each time calculates strlen() of its argument. Its argument is
entire shader, so it multiple time scans the whole shader text.

Signed-off-by: Vladislav Egorov <vegorov...@gmail.com>

Reviewed-by: Nicolai Hähnle 

V2: Adapt to different API of string buffer (Thomas Helland)
---
 src/compiler/glsl/glcpp/pp.c | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/compiler/glsl/glcpp/pp.c b/src/compiler/glsl/glcpp/pp.c
index 861f6e8669..fab52227a0 100644
--- a/src/compiler/glsl/glcpp/pp.c
+++ b/src/compiler/glsl/glcpp/pp.c
@@ -97,17 +97,25 @@ skip_newline (const char *str)
return ret;
 }
 
+/* Initial output buffer size, 4096 minus ralloc() overhead. It was selected
+ * to minimize total amount of allocated memory during shader-db run.
+ */
+#define INITIAL_PP_OUTPUT_BUF_SIZE 4048
+
 /* Remove any line continuation characters in the shader, (whether in
  * preprocessing directives or in GLSL code).
  */
 static char *
 remove_line_continuations(glcpp_parser_t *ctx, const char *shader)
 {
-   char *clean = ralloc_strdup(ctx, "");
+   struct _mesa_string_buffer *sb =
+   _mesa_string_buffer_create(ctx, INITIAL_PP_OUTPUT_BUF_SIZE);
+
const char *backslash, *newline, *search_start;
 const char *cr, *lf;
 char newline_separator[3];
int collapsed_newlines = 0;
+   int separator_len;
 
backslash = strchr(shader, '\\');
 
@@ -153,6 +161,7 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
newline_separator[0] = '\n';
newline_separator[1] = '\r';
}
+   separator_len = strlen(newline_separator);
 
while (true) {
/* If we have previously collapsed any line-continuations,
@@ -172,10 +181,12 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
if (newline &&
(backslash == NULL || newline < backslash))
{
-   ralloc_strncat(, shader,
-  newline - shader + 1);
+   _mesa_string_buffer_append_len(sb, shader,
+  newline - shader 
+ 1);
while (collapsed_newlines) {
-   ralloc_strcat(, 
newline_separator);
+   _mesa_string_buffer_append_len(sb,
+  
newline_separator,
+  
separator_len);
collapsed_newlines--;
}
shader = skip_newline (newline);
@@ -196,7 +207,7 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
if (backslash[1] == '\r' || backslash[1] == '\n')
{
collapsed_newlines++;
-   ralloc_strncat(, shader, backslash - shader);
+   _mesa_string_buffer_append_len(sb, shader, backslash - 
shader);
shader = skip_newline (backslash + 1);
search_start = shader;
}
@@ -204,9 +215,9 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
backslash = strchr(search_start, '\\');
}
 
-   ralloc_strcat(, shader);
+   _mesa_string_buffer_append(sb, shader);
 
-   return clean;
+   return sb->buf;
 }
 
 int
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] Resend of string buffer series

2017-09-11 Thread Thomas Helland
I think I should have addressed all review feedback pointed out
to me by Nicolai, fixed the build issues with the tests (thanks
to Eric and Emil) and I've discovered another bug thanks to
strengthening the gtest test even more. I've also given it some
more polishing and slight modifications to make comments clearer
and more precise, and variable names better explaining their intet
in a couple of places. Details of changes in each patch. A big thanks
to those who have helped review this series =)

Thomas Helland (5):
  util: Add a string buffer implementation
  util: Add tests for the string buffer
  glsl: Change the parser to use the string buffer
  glcpp: Use string_buffer for line continuation removal
  glcpp: Avoid unnecessary call to strlen

 configure.ac   |   1 +
 src/compiler/glsl/glcpp/glcpp-lex.l|   9 +-
 src/compiler/glsl/glcpp/glcpp-parse.y  | 195 +---
 src/compiler/glsl/glcpp/glcpp.h|   8 +-
 src/compiler/glsl/glcpp/pp.c   |  64 +++---
 src/compiler/glsl/glsl_lexer.ll|  32 ++-
 src/compiler/glsl/glsl_parser.yy   | 246 ++---
 src/util/Makefile.am   |   5 +-
 src/util/Makefile.sources  |   2 +
 src/util/string_buffer.c   | 147 
 src/util/string_buffer.h   |  99 +
 src/util/tests/string_buffer/Makefile.am   |  40 
 .../tests/string_buffer/string_buffer_test.cpp | 119 ++
 13 files changed, 665 insertions(+), 302 deletions(-)
 create mode 100644 src/util/string_buffer.c
 create mode 100644 src/util/string_buffer.h
 create mode 100644 src/util/tests/string_buffer/Makefile.am
 create mode 100644 src/util/tests/string_buffer/string_buffer_test.cpp

-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] glcpp: Avoid unnecessary call to strlen

2017-09-11 Thread Thomas Helland
Length of the token was already calculated by flex and stored in yyleng,
no need to implicitly call strlen() via linear_strdup().

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Timothy Arceri 

V2: Also convert this pattern in glsl_lexer.ll

V3: Remove a misplaced comment
Fix compile warning from V2
---
 src/compiler/glsl/glcpp/glcpp-lex.l |   9 +-
 src/compiler/glsl/glsl_lexer.ll |  32 -
 src/compiler/glsl/glsl_parser.yy| 246 ++--
 3 files changed, 157 insertions(+), 130 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
b/src/compiler/glsl/glcpp/glcpp-lex.l
index 381b97364a..9cfcc12022 100644
--- a/src/compiler/glsl/glcpp/glcpp-lex.l
+++ b/src/compiler/glsl/glcpp/glcpp-lex.l
@@ -101,7 +101,14 @@ void glcpp_set_column (int  column_no , yyscan_t 
yyscanner);
 #define RETURN_STRING_TOKEN(token) \
do {\
if (! parser->skipping) {   \
-   yylval->str = linear_strdup(yyextra->linalloc, yytext); 
\
+   /* We're not doing linear_strdup here, to avoid \
+* an implicit call on strlen() for the length  \
+* of the string, as this is already found by   \
+* flex and stored in yyleng */ \
+   void *mem_ctx = yyextra->linalloc;  \
+   yylval->str = linear_alloc_child(mem_ctx,   \
+yyleng + 1);   \
+   memcpy(yylval->str, yytext, yyleng + 1);\
RETURN_TOKEN_NEVER_SKIP (token);\
}   \
} while(0)
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 7c41455d98..3a67f0ea40 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -81,8 +81,13 @@ static int classify_identifier(struct _mesa_glsl_parse_state 
*, const char *);
  "illegal use of reserved word `%s'", yytext); \
 return ERROR_TOK;  \
   } else { \
-void *mem_ctx = yyextra->linalloc; 
\
-yylval->identifier = linear_strdup(mem_ctx, yytext);   \
+/* We're not doing linear_strdup here, to avoid an implicit\
+ * call on strlen() for the length of the string, as this is   \
+ * already found by flex and stored in yyleng */   \
+void *mem_ctx = yyextra->linalloc; \
+yylval->identifier = (char *) linear_alloc_child(mem_ctx,  \
+ yyleng + 1);  \
+memcpy(yylval->identifier, yytext, yyleng + 1);\
 return classify_identifier(yyextra, yytext);   \
   }
\
} while (0)
@@ -261,8 +266,13 @@ HASH   ^{SPC}#{SPC}
 [ \t\r]*   { }
 :  return COLON;
 [_a-zA-Z][_a-zA-Z0-9]* {
+  /* We're not doing linear_strdup here, to 
avoid an implicit call
+   * on strlen() for the length of the string, 
as this is already
+   * found by flex and stored in yyleng
+   */
   void *mem_ctx = yyextra->linalloc;
-  yylval->identifier = linear_strdup(mem_ctx, 
yytext);
+  yylval->identifier = (char *) 
linear_alloc_child(mem_ctx, yyleng + 1);
+  memcpy(yylval->identifier, yytext, yyleng + 
1);
   return IDENTIFIER;
}
 [1-9][0-9]*{
@@ -449,8 +459,13 @@ layout {
   || yyextra->ARB_tessellation_shader_enable) {
  return LAYOUT_TOK;
   } else {
+ /* We're not doing linear_strdup here, to avoid an 
implicit call
+  * on strlen() for the length of the string, as this is 
already
+  * found by flex and stored in yyleng
+  */
  void *mem_ctx = yyextra->linalloc;
- yylval->identifier = linear_strdup(mem_ctx, yytext);
+ yylval->identifier = (char *) linear_alloc_child(mem_ctx, 
yyleng + 1);
+ memcpy(yylval->identifier, yytext, yyleng + 1);
  return 

[Mesa-dev] [PATCH 3/5] glsl: Change the parser to use the string buffer

2017-09-11 Thread Thomas Helland
Reviewed-by: Nicolai Hähnle 

V2: Pointed out by Timothy
   - Fix pp.c reralloc size issue and comment

V3 - Use vprintf instead of printf where we should
   - Fixes failing make-check tests

V4 - Use buffer_append_char in a couple more places
---
 src/compiler/glsl/glcpp/glcpp-parse.y | 195 ++
 src/compiler/glsl/glcpp/glcpp.h   |   8 +-
 src/compiler/glsl/glcpp/pp.c  |  39 +++
 3 files changed, 78 insertions(+), 164 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 898a26044f..913bce1fde 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -209,12 +209,7 @@ line:
 |  SPACE control_line
 |  text_line {
_glcpp_parser_print_expanded_token_list (parser, $1);
-   const char *newline_str = "\n";
-   size_t size = strlen(newline_str);
-
-   ralloc_str_append(>output, newline_str,
- parser->output_length, size);
-   parser->output_length += size;
+   _mesa_string_buffer_append_char(parser->output, '\n');
}
 |  expanded_line
 ;
@@ -233,20 +228,16 @@ expanded_line:
 |  LINE_EXPANDED integer_constant NEWLINE {
parser->has_new_line_number = 1;
parser->new_line_number = $2;
-   ralloc_asprintf_rewrite_tail (>output,
- >output_length,
- "#line %" PRIiMAX "\n",
- $2);
+   _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX 
"\n", $2);
}
 |  LINE_EXPANDED integer_constant integer_constant NEWLINE {
parser->has_new_line_number = 1;
parser->new_line_number = $2;
parser->has_new_source_number = 1;
parser->new_source_number = $3;
-   ralloc_asprintf_rewrite_tail (>output,
- >output_length,
- "#line %" PRIiMAX " %" PRIiMAX 
"\n",
- $2, $3);
+   _mesa_string_buffer_printf(parser->output,
+  "#line %" PRIiMAX " %" PRIiMAX "\n",
+   $2, $3);
}
 ;
 
@@ -264,12 +255,7 @@ define:
 
 control_line:
control_line_success {
-   const char *newline_str = "\n";
-   size_t size = strlen(newline_str);
-
-   ralloc_str_append(>output, newline_str,
- parser->output_length, size);
-   parser->output_length += size;
+   _mesa_string_buffer_append_char(parser->output, '\n');
}
 |  control_line_error
 |  HASH_TOKEN LINE pp_tokens NEWLINE {
@@ -459,7 +445,7 @@ control_line_success:
glcpp_parser_resolve_implicit_version(parser);
}
 |  HASH_TOKEN PRAGMA NEWLINE {
-   ralloc_asprintf_rewrite_tail (>output, 
>output_length, "#%s", $2);
+   _mesa_string_buffer_printf(parser->output, "#%s", $2);
}
 ;
 
@@ -1137,133 +1123,61 @@ _token_list_equal_ignoring_space(token_list_t *a, 
token_list_t *b)
 }
 
 static void
-_token_print(char **out, size_t *len, token_t *token)
+_token_print(struct _mesa_string_buffer *out, token_t *token)
 {
if (token->type < 256) {
-  size_t size = sizeof(char);
-
-  ralloc_str_append(out, (char *) >type, *len, size);
-  *len += size;
+  _mesa_string_buffer_append_char(out, token->type);
   return;
}
 
switch (token->type) {
case INTEGER:
-  ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival);
+  _mesa_string_buffer_printf(out, "%" PRIiMAX, token->value.ival);
   break;
case IDENTIFIER:
case INTEGER_STRING:
-   case OTHER: {
-  size_t size = strlen(token->value.str);
-
-  ralloc_str_append(out, token->value.str, *len, size);
-  *len += size;
+   case OTHER:
+  _mesa_string_buffer_append(out, token->value.str);
   break;
-   }
-   case SPACE: {
-  const char *token_str = " ";
-  size_t size = strlen(token_str);
-
-  ralloc_str_append(out, token_str, *len, size);
-  *len += size;
+   case SPACE:
+  _mesa_string_buffer_append_char(out, ' ');
   break;
-   }
-   case LEFT_SHIFT: {
-  const char *token_str = "<<";
-  size_t size = strlen(token_str);
-
-  ralloc_str_append(out, token_str, *len, size);
-  *len += size;
+   case LEFT_SHIFT:
+  _mesa_string_buffer_append(out, "<<");
   break;
-   }
-   case RIGHT_SHIFT: {
-  const char *token_str = ">>";
-  size_t size = strlen(token_str);
-
-  ralloc_str_append(out, token_str, *len, size);
-  *len += size;
+   case RIGHT_SHIFT:
+  _mesa_string_buffer_append(out, 

Re: [Mesa-dev] [PATCH 0/8] Resend of preprocessor series

2017-09-06 Thread Thomas Helland
I'm busy until Sunday, but I'll see if I can find the time
to address Nicolai's comments on Sunday evening.
I've addressed the build issues with the tests, and the
comment about using util_vsnprintf, so it's getting there.
I've also done some general polishing on comments, etc.



6. sep. 2017 23.00 skrev "Dieter Nützel" <die...@nuetzel-hh.de>:

For the series:

Tested-by: Dieter Nützel <die...@nuetzel-hh.de>

But do NOT apply on current git any longer.
With Nicolai's comments addressed new version underway? ;-)

Dieter


Am 29.08.2017 21:56, schrieb Thomas Helland:

> This is a resend of the string buffer implementation and
> related patches sent out back in May. I've done one more
> change to the string buffer; using u_string.h for a compatible
> vsnprintf version to reduce the code even more. I've not been
> able to test this due to two build breakages (xmlpool and dri)
> that I'm still trying to figure out of. But since I promised
> to send these out this evening, I'm sending them untested.
> I did test them thoroughly the last time around though,
> so I believe it should be mostly good as long as I haven't
> messed up the rebasing. I believe the string buffer part of
> the series is the most important; the rest I've not really
> gotten around to performance test much.
>
> Thomas Helland (7):
>   util: Add a string buffer implementation
>   util: Add tests for the string buffer
>   glsl: Change the parser to use the string buffer
>   glcpp: Use string_buffer for line continuation removal
>   glcpp: Avoid unnecessary call to strlen
>   port to gtest
>   fix test makefile
>
> Vladislav Egorov (1):
>   glcpp: Use Bloom filter before identifier search
>
>  configure.ac  |   2 +
>  src/compiler/glsl/glcpp/glcpp-lex.l   |   3 +-
>  src/compiler/glsl/glcpp/glcpp-parse.y | 219
> -
>  src/compiler/glsl/glcpp/glcpp.h   |  18 +-
>  src/compiler/glsl/glcpp/pp.c  |  64 ---
>  src/util/Makefile.am  |   3 +-
>  src/util/Makefile.sources |   2 +
>  src/util/string_buffer.c  | 155 +++
>  src/util/string_buffer.h  |  87 +
>  src/util/tests/string_buffer/Makefile.am  |  38 
>  src/util/tests/string_buffer/append_and_print.cpp | 221
> ++
>  11 files changed, 633 insertions(+), 179 deletions(-)
>  create mode 100644 src/util/string_buffer.c
>  create mode 100644 src/util/string_buffer.h
>  create mode 100644 src/util/tests/string_buffer/Makefile.am
>  create mode 100644 src/util/tests/string_buffer/append_and_print.cpp
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/mtypes: repack gl_texture_object.

2017-09-03 Thread Thomas Helland
2017-09-03 13:39 GMT+02:00 Dave Airlie <airl...@gmail.com>:
> On 3 September 2017 at 21:22, Thomas Helland <thomashellan...@gmail.com> 
> wrote:
>> 2017-09-03 13:18 GMT+02:00 Dave Airlie <airl...@gmail.com>:
>>> From: Dave Airlie <airl...@redhat.com>
>>>
>>> reduces size from 1144 to 1128.
>>>
>>> Signed-off-by: Dave Airlie <airl...@redhat.com>
>>> ---
>>>  src/mesa/main/mtypes.h | 10 +-
>>>  1 file changed, 5 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
>>> index d44897b..3d68a6d 100644
>>> --- a/src/mesa/main/mtypes.h
>>> +++ b/src/mesa/main/mtypes.h
>>> @@ -1012,7 +1012,6 @@ struct gl_texture_object
>>> struct gl_sampler_object Sampler;
>>>
>>> GLenum DepthMode;   /**< GL_ARB_depth_texture */
>>> -   bool StencilSampling;   /**< Should we sample stencil instead of 
>>> depth? */
>>>
>>> GLfloat Priority;   /**< in [0,1] */
>>> GLint BaseLevel;/**< min mipmap level, OpenGL 1.2 */
>>> @@ -1033,12 +1032,17 @@ struct gl_texture_object
>>> GLboolean Immutable;/**< GL_ARB_texture_storage */
>>> GLboolean _IsFloat; /**< GL_OES_float_texture */
>>> GLboolean _IsHalfFloat; /**< GL_OES_half_float_texture */
>>> +   bool StencilSampling;   /**< Should we sample stencil instead of 
>>> depth? */
>>> +   bool HandleAllocated;   /**< GL_ARB_bindless_texture */
>>>
>>
>> Maybe we could use "pragma pack" here instead?
>> I'm debating with myself whether or not moving this
>> bool away from the rest of the bindless_texture related
>> variables is worth saving the few bytes.
>
> You don't ever want pragma pack for this. As that will force a
> uint32_t after a bool to be misaligned,
> which would suck for everyone.
>
> Saving 7 bytes of pointless padding at the end of a struct that gets
> allocated quite a lot by GL applications,
> seems worth it for me. gl_texture_object is probably one of the most
> allocated application object structs.
>
> Dave.
>

I thought I remembered something about unaligned access
being not that painfull on post-haswell x86. But hey, we've got
other architectures to support that are not that fortunate.
I had initially dropped this patch from a similar patch series
I have sitting locally due to not being sure of the benefit.
But the change looks good and rationale is sound, so:

Reviewed-by: Thomas Helland<thomashellan...@gmail.com>

>>
>>> GLuint MinLevel;/**< GL_ARB_texture_view */
>>> GLuint MinLayer;/**< GL_ARB_texture_view */
>>> GLuint NumLevels;   /**< GL_ARB_texture_view */
>>> GLuint NumLayers;   /**< GL_ARB_texture_view */
>>>
>>> +   /** GL_EXT_memory_object */
>>> +   GLenum TextureTiling;
>>> +
>>> /** Actual texture images, indexed by [cube face] and [mipmap level] */
>>> struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS];
>>>
>>> @@ -1057,13 +1061,9 @@ struct gl_texture_object
>>> /** GL_ARB_shader_image_load_store */
>>> GLenum ImageFormatCompatibilityType;
>>>
>>> -   /** GL_EXT_memory_object */
>>> -   GLenum TextureTiling;
>>> -
>>> /** GL_ARB_bindless_texture */
>>> struct util_dynarray SamplerHandles;
>>> struct util_dynarray ImageHandles;
>>> -   bool HandleAllocated;
>>>  };
>>>
>>>
>>> --
>>> 2.9.5
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/mtypes: repack gl_sampler_object.

2017-09-03 Thread Thomas Helland
Reviewed-by: Thomas Helland<thomashellan...@gmail.com>

2017-09-03 13:21 GMT+02:00 Dave Airlie <airl...@gmail.com>:
> From: Dave Airlie <airl...@redhat.com>
>
> 160->152.
>
> Signed-off-by: Dave Airlie <airl...@redhat.com>
> ---
>  src/mesa/main/mtypes.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 3d68a6d..db9ea76 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -990,8 +990,8 @@ struct gl_sampler_object
> GLboolean CubeMapSeamless;   /**< GL_AMD_seamless_cubemap_per_texture */
>
> /** GL_ARB_bindless_texture */
> -   struct util_dynarray Handles;
> bool HandleAllocated;
> +   struct util_dynarray Handles;
>  };
>
>
> --
> 2.9.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   4   5   6   >