Author: erichkeane Date: 2025-05-09T14:42:48-07:00 New Revision: c9d8ff081e765bad484deaf0d211e9929319d111
URL: https://github.com/llvm/llvm-project/commit/c9d8ff081e765bad484deaf0d211e9929319d111 DIFF: https://github.com/llvm/llvm-project/commit/c9d8ff081e765bad484deaf0d211e9929319d111.diff LOG: [OpenACC][CIR] Implement 'num_gangs' lowering for combined constructs Another simple one, most of the work is in writing the tests, applies to the compute operation. Added: Modified: clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h clang/test/CIR/CodeGenOpenACC/combined.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index a03b95a7945db..ebe5b50be4ead 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -339,10 +339,10 @@ class OpenACCClauseCIREmitter final operation.addNumGangsOperands(builder.getContext(), values, lastDeviceTypeValues); + } else if constexpr (isCombinedType<OpTy>) { + applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Combined constructs remain. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitNumGangsClause"); } } diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index 6e9543c269477..90ba5cbceb800 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -721,4 +721,96 @@ extern "C" void acc_combined(int N, int cond) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc +#pragma acc parallel loop num_gangs(1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop num_gangs(cond) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.kernels combined(loop) num_gangs({%[[CONV_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop num_gangs(1, cond, 2) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}) { + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc kernels loop num_gangs(1) device_type(radeon) num_gangs(cond) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.kernels combined(loop) num_gangs({%[[ONE_CAST]] : si32}, {%[[CONV_CAST]] : si32} [#acc.device_type<radeon>]) { + // CHECK-NEXT: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop num_gangs(1, cond, 2) device_type(radeon) num_gangs(4, 5, 6) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i + // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_LITERAL:.*]] = cir.const #cir.int<5> : !s32i + // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[SIX_LITERAL:.*]] = cir.const #cir.int<6> : !s32i + // CHECK-NEXT: %[[SIX_CAST:.*]] = builtin.unrealized_conversion_cast %[[SIX_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}, {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type<radeon>]) + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel loop num_gangs(1, cond, 2) device_type(radeon, nvidia) num_gangs(4, 5, 6) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i + // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_LITERAL:.*]] = cir.const #cir.int<5> : !s32i + // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[SIX_LITERAL:.*]] = cir.const #cir.int<6> : !s32i + // CHECK-NEXT: %[[SIX_CAST:.*]] = builtin.unrealized_conversion_cast %[[SIX_LITERAL]] : !s32i to si32 + // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}, {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type<radeon>], {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type<nvidia>]) + // CHECK-NEXT: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits