Author: erichkeane Date: 2025-05-09T13:08:54-07:00 New Revision: 73ca11625f40d4bccb9b7219c163fb8b543c4da4
URL: https://github.com/llvm/llvm-project/commit/73ca11625f40d4bccb9b7219c163fb8b543c4da4 DIFF: https://github.com/llvm/llvm-project/commit/73ca11625f40d4bccb9b7219c163fb8b543c4da4.diff LOG: [OpenACC][CIR] Implement 'wait' clause lowering for combined constructs Added: Modified: clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h clang/test/CIR/CodeGenOpenACC/combined.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 1661e3d0c4b62..a03b95a7945db 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -360,9 +360,11 @@ class OpenACCClauseCIREmitter final operation.addWaitOperands(builder.getContext(), clause.hasDevNumExpr(), values, lastDeviceTypeValues); } + } else if constexpr (isCombinedType<OpTy>) { + applyToComputeOp(clause); } else { // TODO: When we've implemented this for everything, switch this to an - // unreachable. Enter data, exit data, update, Combined constructs remain. + // unreachable. Enter data, exit data, update constructs remain. return clauseNotImplemented(clause); } } diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index d79f964f19ebf..6e9543c269477 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s -extern "C" void acc_combined(int N) { - // CHECK: cir.func @acc_combined(%[[ARG_N:.*]]: !s32i loc{{.*}}) { +extern "C" void acc_combined(int N, int cond) { + // CHECK: cir.func @acc_combined(%[[ARG_N:.*]]: !s32i loc{{.*}}, %[[ARG_COND:.*]]: !s32i loc{{.*}}) { // CHECK-NEXT: %[[ALLOCA_N:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["N", init] + // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["cond", init] // CHECK-NEXT: cir.store %[[ARG_N]], %[[ALLOCA_N]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: cir.store %[[ARG_COND]], %[[COND]] : !s32i, !cir.ptr<!s32i> #pragma acc parallel loop for(unsigned I = 0; I < N; ++I); @@ -573,4 +575,150 @@ extern "C" void acc_combined(int N) { // CHECK-NEXT: } loc // CHECK: acc.terminator // CHECK-NEXT: } loc + +#pragma acc parallel loop wait + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.parallel combined(loop) wait { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop wait device_type(nvidia) wait + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.serial combined(loop) wait([#acc.device_type<none>, #acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop wait(1) device_type(nvidia) wait + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.kernels combined(loop) wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop wait device_type(nvidia) wait(1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop wait(1) device_type(nvidia) wait(1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32 + // CHECK-NEXT: acc.serial combined(loop) wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop wait(devnum: cond : 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.kernels combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop wait(devnum: cond : 1, 2) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.serial combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.kernels combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop wait(cond, 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop wait(queues: cond, 1) device_type(radeon) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.serial combined(loop) wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits