Re: [GSoC] generation of GCC expression trees from isl ast expressions

Tobias Grosser Tue, 01 Jul 2014 09:41:35 -0700

On 01/07/2014 14:53, Roman Gareev wrote:

Hi Tobias,


could you please advise me how to verify the results of gimple code
generation?


More comments inline, but here something on a very high level.

I personally like testing already on the GIMPLE level and could see us
matching for certain expressions in the dumped gimple output.
Unfortunately this kind of testing may be a little fragile depending how
often gcc changes its internal dumping (hopefully not too often). On the
other side, in gcc testing is commonly done by compiling and executing
files. For this to work, we would need at least a simple implementation
of body statements before we can get anything tested and checked in.

I've written the first draft of the generation of loops
with empty bodies and tried to verify gimple code using the
representation, which is dumped at the end of the generation of the
dump_file. If we consider the following example, we'll see that cloog
and isl code generator generate similar representation (representation
generated by isl code generator doesn't have body of the loop, as was
expected).

int
main (int n, int *a)
{
   int i;

   for (i = 0; i < 100; i++)
     a[i] = i;

  return 0;
}

gcc/graphite-isl-ast-to-gimple.c

loop_0 (header = 0, latch = 1, niter = )
{
   bb_2 (preds = {bb_0 }, succs = {bb_3 })
   {
     <bb 2>:

   }
   bb_5 (preds = {bb_3 }, succs = {bb_1 })
   {
     <bb 5>:
     # .MEM_10 = PHI <.MEM_3(D)(3)>
     # VUSE <.MEM_10>
     return 0;

   }
   loop_2 (header = 3, latch = 4, niter = )
   {
     bb_3 (preds = {bb_2 bb_4 }, succs = {bb_4 bb_5 })
     {
       <bb 3>:
       # graphite_IV.3_1 = PHI <0(2), graphite_IV.3_14(4)>
       graphite_IV.3_14 = graphite_IV.3_1 + 1;
       if (graphite_IV.3_1 < 99)
         goto <bb 4>;
       else
         goto <bb 5>;

     }
     bb_4 (preds = {bb_3 }, succs = {bb_3 })
     {
       <bb 4>:
       goto <bb 3>;

     }
   }
}

graphite-clast-to-gimple.c

loop_0 (header = 0, latch = 1, niter = )
{
   bb_2 (preds = {bb_0 }, succs = {bb_3 })
   {
     <bb 2>:

   }
   bb_5 (preds = {bb_3 }, succs = {bb_1 })
   {
     <bb 5>:
     # .MEM_18 = PHI <.MEM_11(3)>
     # VUSE <.MEM_18>
     return 0;

   }
   loop_2 (header = 3, latch = 4, niter = )
   {
     bb_3 (preds = {bb_2 bb_4 }, succs = {bb_4 bb_5 })
     {
       <bb 3>:
       # graphite_IV.3_1 = PHI <0(2), graphite_IV.3_14(4)>
       # .MEM_19 = PHI <.MEM_3(D)(2), .MEM_11(4)>
       _2 = (sizetype) graphite_IV.3_1;
       _15 = _2 * 4;
       _16 = a_6(D) + _15;
       _17 = (int) graphite_IV.3_1;
       # .MEM_11 = VDEF <.MEM_19>
       *_16 = _17;
       graphite_IV.3_14 = graphite_IV.3_1 + 1;
       if (graphite_IV.3_1 < 99)
         goto <bb 4>;
       else
         goto <bb 5>;

     }
     bb_4 (preds = {bb_3 }, succs = {bb_3 })
     {
       <bb 4>:
       goto <bb 3>;

     }
   }
}

However, this form doesn't have loop guards which are generated by
graphite_create_new_loop_guard in gcc/graphite-isl-ast-to-gimple.c and
by graphite_create_new_loop_guard in graphite-clast-to-gimple.c.


Maybe the guards are directly constant folded? Can you try with:

 int
 main (int n, int *a)
 {
    int i;

    for (i = 0; i < b; i++)
      a[i] = i;

   return 0;
 }

Below is the code of this generation (It still uses isl_int for
generation of isl_expr_int, because the error related to isl/val_gmp.h
still arises. I've tried to use isl 0.12.2 and 0.13, but gotten the
same error).


Did using 'extern "C"' around the include statement not help?

+/* Stores the INDEX in a vector and the loop nesting LEVEL for a given
+   isl_id NAME.  BOUND_ONE and BOUND_TWO represent the exact lower and
+   upper bounds that can be inferred from the polyhedral representation.  */


Why do you mention BOUND_ONE & BOUND_TWO? I do not see any use of them?

+typedef struct ast_isl_name_index {
+  int index;
+  int level;
+  const char *name;
+  /* If free_name is set, the content of name was allocated by us and needs
+     to be freed.  */
+  char *free_name;
+} *ast_isl_name_index_p;
+
+/* Helper for hashing ast_isl_name_index.  */
+
+struct ast_isl_index_hasher
+{
+  typedef ast_isl_name_index value_type;
+  typedef ast_isl_name_index compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+  static inline void remove (value_type *);
+};
+
+/* Computes a hash function for database element E.  */
+
+inline hashval_t
+ast_isl_index_hasher::hash (const value_type *e)
+{
+  hashval_t hash = 0;
+
+  int length = strlen (e->name);
+  int i;
+
+  for (i = 0; i < length; ++i)
+    hash = hash | (e->name[i] << (i % 4));
+
+  return hash;
+}
+
+/* Compares database elements ELT1 and ELT2.  */
+
+inline bool
+ast_isl_index_hasher::equal (const value_type *elt1, const compare_type *elt2)
+{
+  return strcmp (elt1->name, elt2->name) == 0;
+}
+
+/* Free the memory taken by a ast_isl_name_index struct.  */
+
+inline void
+ast_isl_index_hasher::remove (value_type *c)
+{
+  if (c->free_name)
+    free (c->free_name);
+  free (c);
+}
+
+typedef hash_table<ast_isl_index_hasher> ast_isl_index_htab_type;
+
+/* Returns a pointer to a new element of type ast_isl_name_index_p built
+   from NAME, INDEX, LEVEL.  */
+
+static inline ast_isl_name_index_p
+new_ast_isl_name_index (const char *name, int index, int level)
+{
+  ast_isl_name_index_p res = XNEW (struct ast_isl_name_index);
+  char *new_name = XNEWVEC (char, strlen (name) + 1);
+  strcpy (new_name, name);
+
+  res->name = new_name;
+  res->free_name = new_name;
+  res->level = level;
+  res->index = index;
+  return res;
+}
+
+/* For a given name of the isl_id, which is stored in the isl_ast_expr EXPR_ID,
+   returns -1 if it does not correspond to any parameter, or otherwise, returns
+   the index in the PARAMS or SCATTERING_DIMENSIONS vector.  */
+
+static inline int
+ast_expr_id_to_index (__isl_keep isl_ast_expr *expr_id,
+                     ast_isl_index_htab_type *index_table)
+{
+  struct ast_isl_name_index tmp;
+  ast_isl_name_index **slot;
+
+  isl_id *tmp_isl_id = isl_ast_expr_get_id (expr_id);
+  tmp.name = isl_id_get_name (tmp_isl_id);
+  tmp.free_name = NULL;
+
+  slot = index_table->find_slot (&tmp, NO_INSERT);
+
+  isl_id_free (tmp_isl_id);
+  if (slot && *slot)
+    return (*slot)->index;
+
+  return -1;
+}
+
+/* Records in INDEX_TABLE the INDEX and LEVEL for NAME.  */
+
+static inline void
+save_isl_id_name_index (ast_isl_index_htab_type *index_table,
+                       const char *name, int index, int level)
+{
+  struct ast_isl_name_index tmp;
+  ast_isl_name_index **slot;
+
+  tmp.name = name;
+  tmp.free_name = NULL;
+  slot = index_table->find_slot (&tmp, INSERT);
+
+  if (slot)
+    {
+      free (*slot);
+      *slot = new_ast_isl_name_index (name, index, level);
+    }
+}


Any reason this is not a simpel std::map<isl_id *, tree>, but you instead
have this manually implemented hash table? Does gcc have a policy that
forbids std::map?

+
+/* NEWIVS_INDEX binds ISL's scattering name to the index of the tree
+   induction variable in NEWIVS.
+
+   PARAMS_INDEX binds ISL's parameter name to the index of the tree
+   parameter in PARAMS.  */
+
+typedef struct ivs_params {
+  vec<tree> params, *newivs;

This may also not be needed if we have the std::map<>

+  ast_isl_index_htab_type *newivs_index, *params_index;


Is there a reason we have separat newivs_index and params_index maps?
This was necessary for CLooG as far as I remember, but for isl a simple
isl_id -> tree map should be sufficient, no?

+  sese region;
+} *ivs_params_p;

+/* Converts a isl_ast_expr_op expression E to a GCC expression tree of

an isl_...

+static edge
+translate_isl_ast (loop_p context_loop, __isl_keep isl_ast_node *node,
+                  edge next_e, int level, ivs_params_p ip);
+
+/* Create the loop for a isl_ast_node_for.
+
+   - NEXT_E is the edge where new generated code should be attached.  */
+
+static edge
+translate_isl_ast_for_loop (loop_p context_loop,
+                           __isl_keep isl_ast_node *node_for, edge next_e,
+                           int level, tree type, tree lb, tree ub,


Is there aneed for the level at all? I think in the current
implementation and for isl in general it may not be needed as we have
isl_ids to find the corresponding loop ivs.

+                           ivs_params_p ip)
+{
+  gcc_assert (isl_ast_node_get_type (node_for) == isl_ast_node_for);
+  struct loop *loop = graphite_create_new_loop (next_e, node_for, context_loop,
+                                               type, lb, ub, level, ip);
+  edge last_e = single_exit (loop);
+  edge to_body = single_succ_edge (loop->header);
+  basic_block after = to_body->dest;
+
+  /* Create a basic block for loop close phi nodes.  */
+  last_e = single_succ_edge (split_edge (last_e));
+
+  /* Translate the body of the loop.  */
+  isl_ast_node *for_body = isl_ast_node_for_get_body (node_for);
+  next_e = translate_isl_ast (loop, for_body, to_body,
+                             level + 1, ip);
+  isl_ast_node_free (for_body);
+  redirect_edge_succ_nodup (next_e, after);
+  set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
+
+  /* TODO: Add checking for the loop parallelism.  */
+
+  return last_e;
+}
+
+/* Get upper bound from the cond of the for */


Please explain why we need a special function to get the upper bound.
Specifically, explain that isl in some configurations can generate loop
exit conditions such as:

for (i = ?; i + 2 >= 3 && 3 <= i + m; i++)
  ;

This get upper bound assumes a certain shape of loop exit condition(

for (i = ?; i < expr; i++)

Also, you need to set the option --no-isl-ast-build-atomic-upper-bound
in isl_ast_build to be able to rely on this behavior.
(You did this below)

Do you have a specific motivation, why you don't want to support
arbitrary expressions? I assume this is necessary for the if - do-while
optimization. If this is the case please state this.

+static __isl_give isl_ast_expr *
+get_upper_bound (__isl_keep isl_ast_node *node_for)
+{
+  gcc_assert (isl_ast_node_get_type (node_for) == isl_ast_node_for);
+  isl_ast_expr *for_cond = isl_ast_node_for_get_cond (node_for);
+  gcc_assert (isl_ast_expr_get_type (for_cond) == isl_ast_expr_op);
+  isl_ast_expr *res;
+  switch (isl_ast_expr_get_op_type (for_cond))
+    {
+    case isl_ast_op_le:
+      res = isl_ast_expr_get_op_arg (for_cond, 1);
+      break;
+
+    case isl_ast_op_lt:
+      {
+        // (iteraotr < ub) => (iterator <= ub - 1)


               iterator

+        isl_val *one = isl_val_int_from_si(isl_ast_expr_get_ctx (for_cond), 1);
+        isl_ast_expr *ub = isl_ast_expr_get_op_arg (for_cond, 1);
+        res = isl_ast_expr_sub (ub, isl_ast_expr_from_val (one));
+        break;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+  isl_ast_expr_free (for_cond);
+  return res;
+}

+/* Creates a new if region protecting the loop to be executed, if the execution
+   count is zero (lb > ub).  */

+static edge
+graphite_create_new_loop_guard (edge entry_edge,
+                               __isl_keep isl_ast_node *node_for, tree *type,
+                               tree *lb, tree *ub, ivs_params_p ip)
+{
+  gcc_assert (isl_ast_node_get_type (node_for) == isl_ast_node_for);
+  tree cond_expr;
+  edge exit_edge;
+
+  *type = build_nonstandard_integer_type (128, 0);


So you are hardcoding 128 bit? In this case make this please a proper
static global constant.

+  isl_ast_expr *for_init = isl_ast_node_for_get_init (node_for);
+  *lb = gcc_expression_from_isl_expression (*type, for_init, ip);
+  isl_ast_expr_free (for_init);
+  isl_ast_expr *upper_bound = get_upper_bound (node_for);
+  *ub = gcc_expression_from_isl_expression (*type, upper_bound, ip);
+  isl_ast_expr_free (upper_bound);
+
+  /* When ub is simply a constant or a parameter, use lb <= ub.  */
+  if (TREE_CODE (*ub) == INTEGER_CST || TREE_CODE (*ub) == SSA_NAME)
+    cond_expr = fold_build2 (LE_EXPR, boolean_type_node, *lb, *ub);
+  else
+    {
+      tree one = (POINTER_TYPE_P (*type)
+                 ? convert_to_ptrofftype (integer_one_node)
+                 : fold_convert (*type, integer_one_node));
+      /* Adding +1 and using LT_EXPR helps with loop latches that have a
+        loop iteration count of "PARAMETER - 1".  For PARAMETER == 0 this
+        becomes 2^k-1 due to integer overflow, and the condition lb <= ub
+        is true, even if we do not want this.  However lb < ub + 1 is false,
+        as expected.  */
+      tree ub_one = fold_build2 (POINTER_TYPE_P (*type) ? POINTER_PLUS_EXPR
+                                : PLUS_EXPR, *type, *ub, one);
+
+      cond_expr = fold_build2 (LT_EXPR, boolean_type_node, *lb, ub_one);
+    }
+
+  exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr);
+
+  return exit_edge;
+}
+
+/* Translates a isl_ast_node_for to Gimple. */


an isl_ast_node

Please explain why we do not just generate a loop that has the loop
bound at the top, but instead create a structure of the form

if (lb > ub)
  do {

  } while (lb ..)

(Such a figure, but completed might help).

(I think the original motivation was that later we may be able to prove
that a loop is executed at least once which allows us to remove the if
which again enables better loop invariant code motion)

+
+static edge
+translate_isl_ast_node_for (loop_p context_loop, __isl_keep isl_ast_node *node,
+                           edge next_e, int level, ivs_params_p ip)
+{
+  gcc_assert (isl_ast_node_get_type (node) == isl_ast_node_for);
+  tree type, lb, ub;
+  edge last_e = graphite_create_new_loop_guard (next_e, node, &type,
+                                               &lb, &ub, ip);
+  edge true_e = get_true_edge_from_guard_bb (next_e->dest);
+
+  translate_isl_ast_for_loop (context_loop, node, true_e,
+                             level, type, lb, ub, ip);
+  return last_e;
+}
+
+/* Translates a ISL AST node NODE to GCC representation in the

an ISL AST node

+/* Rename parameters of the schedule_isl according to the SSA form.  */
+
+static __isl_give isl_map *
+add_names_to_schedule_isl (scop_p scop, __isl_take isl_map *schedule_isl,
+                          ast_isl_index_htab_type *params_index)
+{
+  sese region = SCOP_REGION (scop);
+  int i;
+  int nb_parameters = SESE_PARAMS (region).length ();
+  gcc_assert (nb_parameters == (short) isl_map_dim (schedule_isl,
+                                                   isl_dim_param));
+
+  for (i = 0; i < nb_parameters; i++)
+    {
+      tree param = SESE_PARAMS (region)[i];
+      const char *name = get_name (param);
+      int len;
+      char *parameter;
+
+      if (!name)
+       name = "T";
+
+      len = strlen (name);
+      len += 17;
+      parameter = XNEWVEC (char, len + 1);
+      snprintf (parameter, len, "%s_%d", name, SSA_NAME_VERSION (param));
+      save_isl_id_name_index (params_index, parameter, i, i);
+
+      schedule_isl = isl_map_set_dim_name (schedule_isl, isl_dim_param, i,
+                                          parameter);
+      free (parameter);
+    }
+
+  return schedule_isl;
+}
+
+/* Rename parameters of the context according to the SSA form.  */
+
+static __isl_give isl_set *
+add_names_to_context_isl (scop_p scop, __isl_take isl_set *context_isl)
+{
+  sese region = SCOP_REGION (scop);
+  int i;
+  int nb_parameters = SESE_PARAMS (region).length ();
+  gcc_assert (nb_parameters == (short) isl_set_dim (context_isl,
+                                                   isl_dim_param));
+
+  for (i = 0; i < nb_parameters; i++)
+    {
+      tree param = SESE_PARAMS (region)[i];
+      const char *name = get_name (param);
+      int len;
+      char *parameter;
+
+      if (!name)
+       name = "T";
+
+      len = strlen (name);
+      len += 17;
+      parameter = XNEWVEC (char, len + 1);
+      snprintf (parameter, len, "%s_%d", name, SSA_NAME_VERSION (param));
+      context_isl = isl_set_set_dim_name (context_isl, isl_dim_param, i,
+                                         parameter);
+      free (parameter);
+    }
+
+  return context_isl;
+}


Why are the previous two functions necessary?

-static isl_union_map *
+static __isl_give isl_map *


Very good idea. I missed this.

  generate_isl_schedule (scop_p scop)
  {
    int i;
    poly_bb_p pbb;
-  isl_union_map *schedule_isl =
-    isl_union_map_empty (isl_set_get_space (scop->context));
+  isl_map *schedule_isl =
+    isl_map_empty (isl_set_get_space (scop->context));


Why did you switch back to an isl_map?
This seems incorrect for scops with more than two statements.


    FOR_EACH_VEC_ELT (SCOP_BBS (scop), i, pbb)
      {
@@ -96,19 +770,26 @@
        bb_schedule = isl_map_intersect_domain (bb_schedule,
                                              isl_set_copy (pbb->domain));
        schedule_isl =
-        isl_union_map_union (schedule_isl,
-                            isl_union_map_from_map (bb_schedule));
+        isl_map_union (schedule_isl, bb_schedule);


You could simplify this with isl_union_map_add_map(), but this is an
independent patch.

      }
    return schedule_isl;

  }

-static isl_ast_node *
-scop_to_isl_ast (scop_p scop)
+static __isl_give isl_ast_node *
+scop_to_isl_ast (scop_p scop, ast_isl_index_htab_type *params_index)
  {
-  isl_union_map *schedule_isl = generate_isl_schedule (scop);
+  /* Generate loop upper bounds that consist of the current loop iterator,
+  an operator (< or <=) and an expression not involving the iterator.
+  If this option is not set, then the current loop iterator may appear several
+  times in the upper bound. See the isl manual for more details.  */
+  isl_options_set_ast_build_atomic_upper_bound (scop->ctx, true);


Nice comment.

+
+  isl_map *schedule_isl = generate_isl_schedule (scop);
+  schedule_isl = add_names_to_schedule_isl (scop, schedule_isl, params_index);
    isl_ast_build *context_isl = generate_isl_context (scop);
-  isl_ast_node *ast_isl = isl_ast_build_ast_from_schedule (context_isl,
-                                                          schedule_isl);
+  isl_ast_node *ast_isl
+    = isl_ast_build_ast_from_schedule (context_isl,
+                                      isl_union_map_from_map (schedule_isl));
    isl_ast_build_free (context_isl);
    return ast_isl;
  }
@@ -117,21 +798,74 @@
     the given SCOP.  Return true if code generation succeeded.

     FIXME: This is not yet a full implementation of the code generator
-          with ISL ASTs. Generation of GIMPLE code is have to be added.  */
+          with ISL ASTs. Generation of GIMPLE code is have to be completed.  */


code has to be completed.

Cheers,
Tobias

P.S.: I just wanted to let you know that your work is amazing. Almost
fully unsupervised you are always providing high-quality patches! I am
very impressed.

Re: [GSoC] generation of GCC expression trees from isl ast expressions

Reply via email to