Issue 71056
Summary [LV] Adjust the pipeline to eliminate unnecessary sext instructions.
Labels new issue
Assignees
Reporter erickq
    I was working on a case recently, and when I specified -march=armv8-a+sve, I wanted to be able to generate a sve assembly, but I couldn't vectorize it because the cost model thought it would require a lot of scev checks.


```
typedef struct {
  int subtype;
 struct {int i, j, k;}dim;
  struct {int box, i, j, k, jStride, kStride;double * __restrict__ ptr;}read,write;
} __attribute__((aligned(64))) blockCopy_type;



typedef struct {
  int global_box_id;
  struct {int i, j, k;}low;
  int dim;
 int ghosts;
  int jStride,kStride,volume;
  int numVectors;
  double ** __restrict__ vectors;
} box_type;



typedef struct {
 double h;
  int active;
  int num_ranks;
  int my_rank;
  int box_dim;
  int box_ghosts;
  int box_jStride,box_kStride,box_volume;
  int numVectors;
  int tag;
 struct {int i, j, k;}boxes_in;
  struct {int i, j, k;}dim;

  int * rank_of_box;
  int num_my_boxes;
  box_type * my_boxes;

  double ** __restrict__ vectors;

  int allocated_blocks;
  int num_my_blocks;
  blockCopy_type * my_blocks;

  struct {
    int type;
    int allocated_blocks[3];
    int num_blocks[3];
 blockCopy_type * blocks[3];
  } boundary_condition;


  double dominant_eigenvalue_of_DinvA;
  int must_subtract_mean;
  double * __restrict__ RedBlack_FP;

  int num_threads;


  int use_offload;
  int um_access_policy;
  double *chebyshev_c1, *chebyshev_c2;
  int Krylov_iterations;
  int CAKrylov_formations_of_G;
  int vcycles_from_this_level;
} level_type;

  void residual(level_type * level, int res_id, int x_id, int rhs_id, double a, double b);
void residual(level_type * level, int res_id, int x_id, int rhs_id, double a, double b){

  int block;

  if (level->use_offload) {
//    device_residual(level, res_id, x_id, rhs_id, a, b);
  }
  else {
#pragma omp parallel for private(block) if(level->num_my_blocks>1) schedule(static)
 for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int ghosts = level->my_boxes[box].ghosts;
    const double h2inv = 1.0/(level->h*level->h);
    const double * __restrict__ x = level->my_boxes[box].vectors[ x_id] + ghosts*(1+jStride+kStride);
 const double * __restrict__ rhs = level->my_boxes[box].vectors[ rhs_id] + ghosts*(1+jStride+kStride);

    const double * __restrict__ beta_i = level->my_boxes[box].vectors[6] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_j = level->my_boxes[box].vectors[7] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_k = level->my_boxes[box].vectors[8] + ghosts*(1+jStride+kStride);

 double * __restrict__ res = level->my_boxes[box].vectors[ res_id] + ghosts*(1+jStride+kStride);

    for(k=klo;k<khi;k++){
 for(j=jlo;j<jhi;j++){

    for(i=ilo;i<ihi;i++){

      int ijk = i + j*jStride + k*kStride;
      double Ax = ( -b*h2inv*( ( 0.0833333333333333333)*( + beta_i[ijk ]*( 15.0*(x[ijk-1 ]-x[ijk]) - (x[ijk-2 ]-x[ijk+1 ]) ) + beta_i[ijk+1 ]*( 15.0*(x[ijk+1 ]-x[ijk]) - (x[ijk+2 ]-x[ijk-1 ]) ) + beta_j[ijk ]*( 15.0*(x[ijk-jStride]-x[ijk]) - (x[ijk-2*jStride]-x[ijk+jStride]) ) + beta_j[ijk+jStride]*( 15.0*(x[ijk+jStride]-x[ijk]) - (x[ijk+2*jStride]-x[ijk-jStride]) ) + beta_k[ijk ]*( 15.0*(x[ijk-kStride]-x[ijk]) - (x[ijk-2*kStride]-x[ijk+kStride]) ) + beta_k[ijk+kStride]*( 15.0*(x[ijk+kStride]-x[ijk]) - (x[ijk+2*kStride]-x[ijk-kStride]) ) ) + 0.25*( 0.0833333333333333333)*( + (beta_i[ijk +jStride]-beta_i[ijk -jStride]) * (x[ijk-1 +jStride]-x[ijk+jStride]-x[ijk-1 -jStride]+x[ijk-jStride]) + (beta_i[ijk +kStride]-beta_i[ijk -kStride]) * (x[ijk-1 +kStride]-x[ijk+kStride]-x[ijk-1 -kStride]+x[ijk-kStride]) + (beta_j[ijk +1 ]-beta_j[ijk -1 ]) * (x[ijk-jStride+1 ]-x[ijk+1 ]-x[ijk-jStride-1 ]+x[ijk-1 ]) + (beta_j[ijk +kStride]-beta_j[ijk -kStride]) * (x[ijk-jStride+kStride]-x[ijk+kStride]-x[ijk-jStride-kStride]+x[ijk-kStride]) + (beta_k[ijk +1 ]-beta_k[ijk -1 ]) * (x[ijk-kStride+1 ]-x[ijk+1 ]-x[ijk-kStride-1 ]+x[ijk-1 ]) + (beta_k[ijk +jStride]-beta_k[ijk -jStride]) * (x[ijk-kStride+jStride]-x[ijk+jStride]-x[ijk-kStride-jStride]+x[ijk-jStride]) + (beta_i[ijk+1 +jStride]-beta_i[ijk+1 -jStride]) * (x[ijk+1 +jStride]-x[ijk+jStride]-x[ijk+1 -jStride]+x[ijk-jStride]) + (beta_i[ijk+1 +kStride]-beta_i[ijk+1 -kStride]) * (x[ijk+1 +kStride]-x[ijk+kStride]-x[ijk+1 -kStride]+x[ijk-kStride]) + (beta_j[ijk+jStride+1 ]-beta_j[ijk+jStride-1 ]) * (x[ijk+jStride+1 ]-x[ijk+1 ]-x[ijk+jStride-1 ]+x[ijk-1 ]) + (beta_j[ijk+jStride+kStride]-beta_j[ijk+jStride-kStride]) * (x[ijk+jStride+kStride]-x[ijk+kStride]-x[ijk+jStride-kStride]+x[ijk-kStride]) + (beta_k[ijk+kStride+1 ]-beta_k[ijk+kStride-1 ]) * (x[ijk+kStride+1 ]-x[ijk+1 ]-x[ijk+kStride-1 ]+x[ijk-1 ]) + (beta_k[ijk+kStride+jStride]-beta_k[ijk+kStride-jStride]) * (x[ijk+kStride+jStride]-x[ijk+jStride]-x[ijk+kStride-jStride]+x[ijk-jStride]) ) ) );
      res[ijk] = rhs[ijk]-Ax;
    }}}
  }
 }
}
```


_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to