================
@@ -2591,8 +2597,49 @@ void OpenMPIRBuilder::emitReductionListCopy(
// Now that all active lanes have read the element in the
// Reduce list, shuffle over the value from the remote lane.
if (ShuffleInElement) {
- shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr,
RI.ElementType,
- RemoteLaneOffset, ReductionArrayTy);
+ Type *ShuffleType = RI.ElementType;
+ Value *ShuffleSrcAddr = SrcElementAddr;
+ Value *ShuffleDestAddr = DestElementAddr;
+ Value *Zero = ConstantInt::get(Builder.getInt32Ty(), 0);
+ AllocaInst *LocalStorage = nullptr;
+
+ if (IsByRefElem) {
+ assert(RI.ByRefElementType && "Expected by-ref element type to be
set");
+ assert(RI.ByRefAllocatedType &&
+ "Expected by-ref allocated type to be set");
+ // For by-ref reductions, we need to copy from the remote lane the
+ // actual value of the partial reduction computed by that remote lane;
+ // rather than, for example, a pointer to that data or, even worse, a
+ // pointer to the descriptor of the by-ref reduction element.
+ ShuffleType = RI.ByRefElementType;
+
+ ShuffleSrcAddr = Builder.CreateGEP(RI.ByRefAllocatedType,
----------------
tblah wrote:
Yes it would have to understand these new regions. I was just imagining the
region would allow us to keep the codegen for accessing elements of a box
inside of flang. I'm worried that one day some other MLIR user may want to
lower through this (e.g. maybe the efforts to represent the clang AST in MLIR)
and then hit strange bugs from these bits of code that make assumptions about
the use of fortran descriptors.
So something like this might work
```
omp.declare_reduction @add_reduction_byref_box_?xi32 :
!fir.ref<!fir.box<!fir.array<?xi32>>> alloc {
%0 = fir.alloca !fir.box<!fir.array<?xi32>>
omp.yield(%0 : !fir.ref<!fir.box<!fir.array<?xi32>>>)
} init {
^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>, %arg1:
!fir.ref<!fir.box<!fir.array<?xi32>>>):
...
omp.yield(%arg1 : !fir.ref<!fir.box<!fir.array<?xi32>>>)
} combiner {
^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>, %arg1:
!fir.ref<!fir.box<!fir.array<?xi32>>>):
...
omp.yield(%arg0 : !fir.ref<!fir.box<!fir.array<?xi32>>>)
} cleanup {
^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>):
...
omp.yield
} data_size {
^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>):
%c1 = arith.constant 0 : i32
%box = fir.load %arg0
%ele_size = fir.box_elesize %box
%dims:3 = fir.box_dims %box, %c1
%num_elements = ...
%size = arith.muli %ele_size %num_elements
omp.yield(%size)
} data_addr {
^bb0(%arg0: !fir.ref<!fir.box<!fir.array<?xi32>>>):
%box = fir.load %arg0
%addr = fir.box_addr %box
omp.yield(%box)
}
```
Then other lowering routes (e.g. via mlir's memref) would be free to populate
these regions differently. I guess there should be some sensible default for
types which aren't passed via a descriptor.
For the Fortran case, it is important we use fir.box_elesize because the size
might not be known at compile time (I think one example of that is polymorphic
types). If it isn't done already, this could easily be canonicalised within
flang to a constant for types which do have a size known at compile time.
Similarly, a constant size should be generated where possible so that MLIR can
fold the whole data_size region into a constant where possible.
https://github.com/llvm/llvm-project/pull/165714
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits