| Issue |
71056
|
| Summary |
[LV] Adjust the pipeline to eliminate unnecessary sext instructions.
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
erickq
|
I was working on a case recently, and when I specified -march=armv8-a+sve, I wanted to be able to generate a sve assembly, but I couldn't vectorize it because the cost model thought it would require a lot of scev checks.
```
typedef struct {
int subtype;
struct {int i, j, k;}dim;
struct {int box, i, j, k, jStride, kStride;double * __restrict__ ptr;}read,write;
} __attribute__((aligned(64))) blockCopy_type;
typedef struct {
int global_box_id;
struct {int i, j, k;}low;
int dim;
int ghosts;
int jStride,kStride,volume;
int numVectors;
double ** __restrict__ vectors;
} box_type;
typedef struct {
double h;
int active;
int num_ranks;
int my_rank;
int box_dim;
int box_ghosts;
int box_jStride,box_kStride,box_volume;
int numVectors;
int tag;
struct {int i, j, k;}boxes_in;
struct {int i, j, k;}dim;
int * rank_of_box;
int num_my_boxes;
box_type * my_boxes;
double ** __restrict__ vectors;
int allocated_blocks;
int num_my_blocks;
blockCopy_type * my_blocks;
struct {
int type;
int allocated_blocks[3];
int num_blocks[3];
blockCopy_type * blocks[3];
} boundary_condition;
double dominant_eigenvalue_of_DinvA;
int must_subtract_mean;
double * __restrict__ RedBlack_FP;
int num_threads;
int use_offload;
int um_access_policy;
double *chebyshev_c1, *chebyshev_c2;
int Krylov_iterations;
int CAKrylov_formations_of_G;
int vcycles_from_this_level;
} level_type;
void residual(level_type * level, int res_id, int x_id, int rhs_id, double a, double b);
void residual(level_type * level, int res_id, int x_id, int rhs_id, double a, double b){
int block;
if (level->use_offload) {
// device_residual(level, res_id, x_id, rhs_id, a, b);
}
else {
#pragma omp parallel for private(block) if(level->num_my_blocks>1) schedule(static)
for(block=0;block<level->num_my_blocks;block++){
const int box = level->my_blocks[block].read.box;
const int ilo = level->my_blocks[block].read.i;
const int jlo = level->my_blocks[block].read.j;
const int klo = level->my_blocks[block].read.k;
const int ihi = level->my_blocks[block].dim.i + ilo;
const int jhi = level->my_blocks[block].dim.j + jlo;
const int khi = level->my_blocks[block].dim.k + klo;
int i,j,k;
const int jStride = level->my_boxes[box].jStride;
const int kStride = level->my_boxes[box].kStride;
const int ghosts = level->my_boxes[box].ghosts;
const double h2inv = 1.0/(level->h*level->h);
const double * __restrict__ x = level->my_boxes[box].vectors[ x_id] + ghosts*(1+jStride+kStride);
const double * __restrict__ rhs = level->my_boxes[box].vectors[ rhs_id] + ghosts*(1+jStride+kStride);
const double * __restrict__ beta_i = level->my_boxes[box].vectors[6] + ghosts*(1+jStride+kStride);
const double * __restrict__ beta_j = level->my_boxes[box].vectors[7] + ghosts*(1+jStride+kStride);
const double * __restrict__ beta_k = level->my_boxes[box].vectors[8] + ghosts*(1+jStride+kStride);
double * __restrict__ res = level->my_boxes[box].vectors[ res_id] + ghosts*(1+jStride+kStride);
for(k=klo;k<khi;k++){
for(j=jlo;j<jhi;j++){
for(i=ilo;i<ihi;i++){
int ijk = i + j*jStride + k*kStride;
double Ax = ( -b*h2inv*( ( 0.0833333333333333333)*( + beta_i[ijk ]*( 15.0*(x[ijk-1 ]-x[ijk]) - (x[ijk-2 ]-x[ijk+1 ]) ) + beta_i[ijk+1 ]*( 15.0*(x[ijk+1 ]-x[ijk]) - (x[ijk+2 ]-x[ijk-1 ]) ) + beta_j[ijk ]*( 15.0*(x[ijk-jStride]-x[ijk]) - (x[ijk-2*jStride]-x[ijk+jStride]) ) + beta_j[ijk+jStride]*( 15.0*(x[ijk+jStride]-x[ijk]) - (x[ijk+2*jStride]-x[ijk-jStride]) ) + beta_k[ijk ]*( 15.0*(x[ijk-kStride]-x[ijk]) - (x[ijk-2*kStride]-x[ijk+kStride]) ) + beta_k[ijk+kStride]*( 15.0*(x[ijk+kStride]-x[ijk]) - (x[ijk+2*kStride]-x[ijk-kStride]) ) ) + 0.25*( 0.0833333333333333333)*( + (beta_i[ijk +jStride]-beta_i[ijk -jStride]) * (x[ijk-1 +jStride]-x[ijk+jStride]-x[ijk-1 -jStride]+x[ijk-jStride]) + (beta_i[ijk +kStride]-beta_i[ijk -kStride]) * (x[ijk-1 +kStride]-x[ijk+kStride]-x[ijk-1 -kStride]+x[ijk-kStride]) + (beta_j[ijk +1 ]-beta_j[ijk -1 ]) * (x[ijk-jStride+1 ]-x[ijk+1 ]-x[ijk-jStride-1 ]+x[ijk-1 ]) + (beta_j[ijk +kStride]-beta_j[ijk -kStride]) * (x[ijk-jStride+kStride]-x[ijk+kStride]-x[ijk-jStride-kStride]+x[ijk-kStride]) + (beta_k[ijk +1 ]-beta_k[ijk -1 ]) * (x[ijk-kStride+1 ]-x[ijk+1 ]-x[ijk-kStride-1 ]+x[ijk-1 ]) + (beta_k[ijk +jStride]-beta_k[ijk -jStride]) * (x[ijk-kStride+jStride]-x[ijk+jStride]-x[ijk-kStride-jStride]+x[ijk-jStride]) + (beta_i[ijk+1 +jStride]-beta_i[ijk+1 -jStride]) * (x[ijk+1 +jStride]-x[ijk+jStride]-x[ijk+1 -jStride]+x[ijk-jStride]) + (beta_i[ijk+1 +kStride]-beta_i[ijk+1 -kStride]) * (x[ijk+1 +kStride]-x[ijk+kStride]-x[ijk+1 -kStride]+x[ijk-kStride]) + (beta_j[ijk+jStride+1 ]-beta_j[ijk+jStride-1 ]) * (x[ijk+jStride+1 ]-x[ijk+1 ]-x[ijk+jStride-1 ]+x[ijk-1 ]) + (beta_j[ijk+jStride+kStride]-beta_j[ijk+jStride-kStride]) * (x[ijk+jStride+kStride]-x[ijk+kStride]-x[ijk+jStride-kStride]+x[ijk-kStride]) + (beta_k[ijk+kStride+1 ]-beta_k[ijk+kStride-1 ]) * (x[ijk+kStride+1 ]-x[ijk+1 ]-x[ijk+kStride-1 ]+x[ijk-1 ]) + (beta_k[ijk+kStride+jStride]-beta_k[ijk+kStride-jStride]) * (x[ijk+kStride+jStride]-x[ijk+jStride]-x[ijk+kStride-jStride]+x[ijk-jStride]) ) ) );
res[ijk] = rhs[ijk]-Ax;
}}}
}
}
}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs