[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen updated this revision to Diff 372843. cchen added a comment. Add tests and fix coding style Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 Files: clang/include/clang/AST/OpenMPClause.h clang/lib/AST/OpenMPClause.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/declare_variant_ast_print.c clang/test/OpenMP/declare_variant_ast_print.cpp clang/test/OpenMP/declare_variant_construct_codegen_1.c clang/test/OpenMP/declare_variant_messages.c Index: clang/test/OpenMP/declare_variant_messages.c === --- clang/test/OpenMP/declare_variant_messages.c +++ clang/test/OpenMP/declare_variant_messages.c @@ -57,6 +57,12 @@ #pragma omp declare variant(foo) match(user = {condition()}) // expected-error {{expected expression}} expected-error {{use of undeclared identifier 'expr'}} expected-error {{expected expression}} expected-note {{the ignored selector spans until here}} int score_and_cond_non_const(); +#pragma omp declare variant(foo) match(construct={teams,parallel,for,simd}) +#pragma omp declare variant(foo) match(construct={target teams}) // expected-error {{expected ')'}} expected-warning {{expected '}' after the context selectors for the context set "construct"; '}' assumed}} expected-note {{to match this '('}} +#pragma omp declare variant(foo) match(construct={parallel for}) // expected-error {{expected ')'}} expected-warning {{expected '}' after the context selectors for the context set "construct"; '}' assumed}} expected-note {{to match this '('}} +#pragma omp declare variant(foo) match(construct={for simd}) // expected-error {{expected ')'}} expected-warning {{expected '}' after the context selectors for the context set "construct"; '}' assumed}} expected-note {{to match this '('}} +int construct(void); + #pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} int a; // expected-error {{'#pragma omp declare variant' can only be applied to functions}} Index: clang/test/OpenMP/declare_variant_construct_codegen_1.c === --- /dev/null +++ clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,334 @@ +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +#ifdef CK1 + +#define N 100 + +void p_vxv(int
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
jdoerfert added a comment. Great! This should work. Though we need actual tests ;) I also provided you with two minor edits below to make it easier to read, no functional change though. Comment at: clang/lib/Sema/SemaOpenMP.cpp:737-738 +if (ScopeEntry) + std::copy(Traits.begin(), Traits.end(), +std::back_inserter(ConstructTraits)); +else Comment at: clang/lib/Sema/SemaOpenMP.cpp:740 +else + for (auto RI = Traits.rbegin(), RE = Traits.rend(); RI != RE; ++RI) { +llvm::omp::TraitProperty Top = ConstructTraits.pop_back_val(); Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen added inline comments. Comment at: clang/lib/Sema/SemaOpenMP.cpp:746 +} + } + } jdoerfert wrote: > jdoerfert wrote: > > So, SmalVector has a pop_back_val which we could use to verify the > > properties are the ones we expect. However, we would need to first put them > > into a vector and if insert is false we would need to reverse the vector. > > Maybe that's the way to go: > > 1) Where you call `handleConstructTrait` right now, just put the traits in > > a vector. > > 2) Make it `handleConstructTraits` and take a vector of construct traits. > > 3) For insert == true, just append them all. > > 4) For insert == falser reverse the vector and do pop_back_val with an > > assertion that they match the result of vector.pop_back_val(); > > > > Does that make sense? > > > > (Apologies for making you rewrite this twice.) > This is not what I meant and I doubt this works. Let me try to explain with > an example: > > `handleDeclareVariantConstructTrait` for a DSKind `parallel_for` will first > add a `parallel` then a `for` construct trait. > When we leave the `parallel for` we should pop them of our construct trait > stack again, and preferably verify we do see the ones we expect. > So let's assume the stack was like this before > > ``` > [ teams ] > // enter parallel for code generation > [ teams, parallel, for] > ... > // exit parallel for code generation > ``` > at this point we want to pop `for` and then `parallel` from the end. > > If the last one on the stack was not `for` or the second to last was not > `parallel` something went wrong. > Since `handleDeclareVariantConstructTrait` does give use the traits "in > order", thus [parallel, for], we need to do something different to verify > this is the suffix of the stack. > > I proposed to accumulate all traits in `handleDeclareVariantConstructTrait` > rather than passing them to `handleConstructTrait` one by one. > So there would be a single call to `handleConstructTrait` at the end with all > the traits, here [parallel, for]. > If we are entering the scope we just append them to the stack: [teams] + > [parallel, for] = [teams, parallel, for] > If we are exiting the scope (Insert == false, or name it ScopeEntry instead), > we reverse the trait set and compare it against what we push from the stack: > stack = [teams, parallel, for] > trait_set = [parallel, for] (passed to `handleConstructTrait` by > `handleDeclareVariantConstructTrait) > verification: > for (trait in reverse(trait_set)) { > stack_last = stack.pop_back_val(); > assert(trait == stack_last && "Something left a trait on the stack!"); > } > > Does this make sense? Thanks for your further explanation and patience, it really helps a lot for me to understand! Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen updated this revision to Diff 372584. cchen added a comment. Fix based on suggestions Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 Files: clang/include/clang/AST/OpenMPClause.h clang/lib/AST/OpenMPClause.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/declare_variant_construct_codegen_1.c Index: clang/test/OpenMP/declare_variant_construct_codegen_1.c === --- /dev/null +++ clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); +void t_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_vxv) match(construct={target}) +#pragma omp declare variant(p_vxv) match(construct={parallel}) +void vxv(int *v1, int *v2, int *v3, int n) { +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} +// CK1: define dso_local void @vxv + +void p_vxv(int *v1, int *v2, int *v3, int n) { +#pragma omp for +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} +// CK1: define dso_local void @p_vxv + +#pragma omp declare target +void t_vxv(int *v1, int *v2, int *v3, int n) { +#pragma distribute simd +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target +// CK1: define dso_local void @t_vxv + + +// CK1-LABEL: define {{[^@]+}}@main +int main() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { +v1[i] = (i + 1); +v2[i] = -(i + 1); +v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { +vxv(v1, v2, v3, N); + } +// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) + + vxv(v1, v2, v3, N); +// CK1: call void @vxv + +#pragma omp parallel + { +vxv(v1, v2, v3, N); + } +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void + + return 0; +} + +// CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void + +// CK1: define internal void [[TARGET_REGION]]( +// CK1: call void @t_vxv + +// CK1: define internal void [[PARALLEL_REGION]]( +// CK1: call void @p_vxv + +#endif // HEADER Index: clang/lib/Sema/SemaOpenMP.cpp === ---
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
jdoerfert added inline comments. Comment at: clang/lib/Sema/SemaOpenMP.cpp:746 +} + } + } jdoerfert wrote: > So, SmalVector has a pop_back_val which we could use to verify the properties > are the ones we expect. However, we would need to first put them into a > vector and if insert is false we would need to reverse the vector. > Maybe that's the way to go: > 1) Where you call `handleConstructTrait` right now, just put the traits in a > vector. > 2) Make it `handleConstructTraits` and take a vector of construct traits. > 3) For insert == true, just append them all. > 4) For insert == falser reverse the vector and do pop_back_val with an > assertion that they match the result of vector.pop_back_val(); > > Does that make sense? > > (Apologies for making you rewrite this twice.) This is not what I meant and I doubt this works. Let me try to explain with an example: `handleDeclareVariantConstructTrait` for a DSKind `parallel_for` will first add a `parallel` then a `for` construct trait. When we leave the `parallel for` we should pop them of our construct trait stack again, and preferably verify we do see the ones we expect. So let's assume the stack was like this before ``` [ teams ] // enter parallel for code generation [ teams, parallel, for] ... // exit parallel for code generation ``` at this point we want to pop `for` and then `parallel` from the end. If the last one on the stack was not `for` or the second to last was not `parallel` something went wrong. Since `handleDeclareVariantConstructTrait` does give use the traits "in order", thus [parallel, for], we need to do something different to verify this is the suffix of the stack. I proposed to accumulate all traits in `handleDeclareVariantConstructTrait` rather than passing them to `handleConstructTrait` one by one. So there would be a single call to `handleConstructTrait` at the end with all the traits, here [parallel, for]. If we are entering the scope we just append them to the stack: [teams] + [parallel, for] = [teams, parallel, for] If we are exiting the scope (Insert == false, or name it ScopeEntry instead), we reverse the trait set and compare it against what we push from the stack: stack = [teams, parallel, for] trait_set = [parallel, for] (passed to `handleConstructTrait` by `handleDeclareVariantConstructTrait) verification: for (trait in reverse(trait_set)) { stack_last = stack.pop_back_val(); assert(trait == stack_last && "Something left a trait on the stack!"); } Does this make sense? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen updated this revision to Diff 372531. cchen added a comment. Remove braces Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 Files: clang/include/clang/AST/OpenMPClause.h clang/lib/AST/OpenMPClause.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/declare_variant_construct_codegen_1.c Index: clang/test/OpenMP/declare_variant_construct_codegen_1.c === --- /dev/null +++ clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); +void t_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_vxv) match(construct={target}) +#pragma omp declare variant(p_vxv) match(construct={parallel}) +void vxv(int *v1, int *v2, int *v3, int n) { +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} +// CK1: define dso_local void @vxv + +void p_vxv(int *v1, int *v2, int *v3, int n) { +#pragma omp for +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} +// CK1: define dso_local void @p_vxv + +#pragma omp declare target +void t_vxv(int *v1, int *v2, int *v3, int n) { +#pragma distribute simd +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target +// CK1: define dso_local void @t_vxv + + +// CK1-LABEL: define {{[^@]+}}@main +int main() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { +v1[i] = (i + 1); +v2[i] = -(i + 1); +v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { +vxv(v1, v2, v3, N); + } +// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) + + vxv(v1, v2, v3, N); +// CK1: call void @vxv + +#pragma omp parallel + { +vxv(v1, v2, v3, N); + } +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void + + return 0; +} + +// CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void + +// CK1: define internal void [[TARGET_REGION]]( +// CK1: call void @t_vxv + +// CK1: define internal void [[PARALLEL_REGION]]( +// CK1: call void @p_vxv + +#endif // HEADER Index: clang/lib/Sema/SemaOpenMP.cpp === ---
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen updated this revision to Diff 372529. cchen added a comment. Fix based on feedback (wait for comment about moving ConstructTrait to IRBuilder) Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 Files: clang/include/clang/AST/OpenMPClause.h clang/lib/AST/OpenMPClause.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/declare_variant_construct_codegen_1.c Index: clang/test/OpenMP/declare_variant_construct_codegen_1.c === --- /dev/null +++ clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); +void t_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_vxv) match(construct={target}) +#pragma omp declare variant(p_vxv) match(construct={parallel}) +void vxv(int *v1, int *v2, int *v3, int n) { +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} +// CK1: define dso_local void @vxv + +void p_vxv(int *v1, int *v2, int *v3, int n) { +#pragma omp for +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} +// CK1: define dso_local void @p_vxv + +#pragma omp declare target +void t_vxv(int *v1, int *v2, int *v3, int n) { +#pragma distribute simd +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target +// CK1: define dso_local void @t_vxv + + +// CK1-LABEL: define {{[^@]+}}@main +int main() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { +v1[i] = (i + 1); +v2[i] = -(i + 1); +v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { +vxv(v1, v2, v3, N); + } +// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) + + vxv(v1, v2, v3, N); +// CK1: call void @vxv + +#pragma omp parallel + { +vxv(v1, v2, v3, N); + } +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void + + return 0; +} + +// CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void + +// CK1: define internal void [[TARGET_REGION]]( +// CK1: call void @t_vxv + +// CK1: define internal void [[PARALLEL_REGION]]( +// CK1: call void @p_vxv + +#endif // HEADER Index: clang/lib/Sema/SemaOpenMP.cpp
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
jdoerfert added inline comments. Comment at: clang/lib/AST/OpenMPClause.cpp:2488 +addTrait(Property); + } } Nit: no braces. Comment at: clang/lib/Parse/ParseOpenMP.cpp:2052 +/* CurrentFunctionDecl */ nullptr, +/* ConstructTraits */ ArrayRef()); `{}` might do it but this is fine too. Comment at: clang/lib/Sema/SemaOpenMP.cpp:746 +} + } + } So, SmalVector has a pop_back_val which we could use to verify the properties are the ones we expect. However, we would need to first put them into a vector and if insert is false we would need to reverse the vector. Maybe that's the way to go: 1) Where you call `handleConstructTrait` right now, just put the traits in a vector. 2) Make it `handleConstructTraits` and take a vector of construct traits. 3) For insert == true, just append them all. 4) For insert == falser reverse the vector and do pop_back_val with an assertion that they match the result of vector.pop_back_val(); Does that make sense? (Apologies for making you rewrite this twice.) Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen updated this revision to Diff 372347. cchen added a comment. Rebase and fix based on suggestions Haven't ConstructTrait to IRBuilder Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 Files: clang/include/clang/AST/OpenMPClause.h clang/lib/AST/OpenMPClause.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/declare_variant_construct_codegen_1.c Index: clang/test/OpenMP/declare_variant_construct_codegen_1.c === --- /dev/null +++ clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); +void t_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_vxv) match(construct={target}) +#pragma omp declare variant(p_vxv) match(construct={parallel}) +void vxv(int *v1, int *v2, int *v3, int n) { +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} +// CK1: define dso_local void @vxv + +void p_vxv(int *v1, int *v2, int *v3, int n) { +#pragma omp for +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} +// CK1: define dso_local void @p_vxv + +#pragma omp declare target +void t_vxv(int *v1, int *v2, int *v3, int n) { +#pragma distribute simd +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target +// CK1: define dso_local void @t_vxv + + +// CK1-LABEL: define {{[^@]+}}@main +int main() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { +v1[i] = (i + 1); +v2[i] = -(i + 1); +v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { +vxv(v1, v2, v3, N); + } +// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) + + vxv(v1, v2, v3, N); +// CK1: call void @vxv + +#pragma omp parallel + { +vxv(v1, v2, v3, N); + } +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void + + return 0; +} + +// CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void + +// CK1: define internal void [[TARGET_REGION]]( +// CK1: call void @t_vxv + +// CK1: define internal void [[PARALLEL_REGION]]( +// CK1: call void @p_vxv + +#endif // HEADER Index: clang/lib/Sema/SemaOpenMP.cpp
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen added inline comments. Comment at: clang/lib/Sema/SemaOpenMP.cpp:737 + } + void eraseConstructTrait() { ConstructTraits.clear(); } + cchen wrote: > jdoerfert wrote: > > Modification was made to make it clear what happens but now I think this is > > not needed. > > > > See below first. Maybe the interface should be > > "handleConstructTrait(Property, bool Insert)" and if Insert is false you > > verify it's the last one and delete it. You'd need to traverse them in > > opposite order though to verify, unsure if verification is therefore > > strictly necessary. > For the delete part, I'm not sure how passing `Property` would work since I'm > not deleting all the construct trait properties inside `ActOnOpenMPRegionEnd` > function. I think I've figured out a way to achieve what you suggest. Comment at: clang/lib/Sema/SemaOpenMP.cpp:4491 ArrayRef Clauses) { + DSAStack->eraseConstructTrait(); if (DSAStack->getCurrentDirective() == OMPD_atomic || jdoerfert wrote: > I know think you need to call something like > `addDeclareVariantConstructTrait` again and do pop_back() instead of > push_back() for each trait matching the DKind. Clearing, or erasing a fixed > amount of traits, does not work. So you are saying that instead of removing all the construct property inside `ActOnOpenMPRegionEnd`, we should delete those construct properties more fine-grained? I'd like to do so and has tried putting the insert/delete logic inside more specific function such as `ActOnOpenMPParllell`, Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen added a comment. > One thing I wanted to do though is keep track of the constructs in the > OpenMPIRBuilder instead. So move the > > /// Vector of declare variant construct traits. > SmallVector ConstructTraits; > > into OpenMPIRBuilder and add/delete the things there. It's not a conceptual > difference but makes it easier > to opt-in for Flang later. We don't have any OpenMPIRBuilder instance inside `SemaOpenMP` yet and the OpenMPIRBuilder constructor requires a `Module` parameter so I'm not sure how to achieve this, can you give me some suggestions? Comment at: clang/lib/Sema/SemaOpenMP.cpp:737 + } + void eraseConstructTrait() { ConstructTraits.clear(); } + jdoerfert wrote: > Modification was made to make it clear what happens but now I think this is > not needed. > > See below first. Maybe the interface should be > "handleConstructTrait(Property, bool Insert)" and if Insert is false you > verify it's the last one and delete it. You'd need to traverse them in > opposite order though to verify, unsure if verification is therefore strictly > necessary. For the delete part, I'm not sure how passing `Property` would work since I'm not deleting all the construct trait properties inside `ActOnOpenMPRegionEnd` function. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
jdoerfert added a comment. Cool! We certainly need more tests but the design you came up with is certainly nicer than what I had in mind. One thing I wanted to do though is keep track of the constructs in the OpenMPIRBuilder instead. So move the /// Vector of declare variant construct traits. SmallVector ConstructTraits; into OpenMPIRBuilder and add/delete the things there. It's not a conceptual difference but makes it easier to opt-in for Flang later. I also left some comments below. I don't think there is anything major though. Comment at: clang/lib/AST/OpenMPClause.cpp:2346 - VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); + // VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); } cchen wrote: > I'm now removing this line since the properties are already added at line > 2334 and not removing this line would lead to an extra iteration in > `OMPContext.cpp` (check out the second diff), and the code will always return > false even if the construct trait properties are added. > > Diff for OpenMPClause.cpp > ``` > 2333 for (const OMPTraitProperty : Selector.Properties) > 2334 VMI.addTrait(Set.Kind, Property.Kind, Property.RawString, > ScorePtr); > 2335 > 2336 if (Set.Kind != TraitSet::construct) > 2337 continue; > 2338 > 2339 // TODO: This might not hold once we implement SIMD properly. > 2340 assert(Selector.Properties.size() == 1 && > 2341 Selector.Properties.front().Kind == > 2342 getOpenMPContextTraitPropertyForSelector( > 2343 Selector.Kind) && > 2344 "Ill-formed construct selector!"); > 2345 > 2346 // VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); > ``` > > Diff for `lib/Frontend/OpenMP/OMPContext.cpp`: > ``` > 224 unsigned ConstructIdx = 0, NoConstructTraits = > Ctx.ConstructTraits.size(); > 225 for (TraitProperty Property : VMI.ConstructTraits) { > 226 assert(getOpenMPContextTraitSetForProperty(Property) == > 227 TraitSet::construct && > 228 "Variant context is ill-formed!"); > 229 > 230 // Verify the nesting. > 231 bool FoundInOrder = false; > 232 while (!FoundInOrder && ConstructIdx != NoConstructTraits) > 233 FoundInOrder = (Ctx.ConstructTraits[ConstructIdx++] == Property); > 234 if (ConstructMatches) > 235 ConstructMatches->push_back(ConstructIdx - 1); > 236 > 237 Optional Result = HandleTrait(Property, FoundInOrder); > 238 if (Result.hasValue()) > 239 return Result.getValue(); > 240 > 241 if (!FoundInOrder) { > 242 LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property " > 243 << getOpenMPContextTraitPropertyName(Property, > "") > 244 << " was not nested properly.\n"); > 245 return false; > 246 } > 247 > 248 // TODO: Verify SIMD > 249 } > ``` I'm not surprised some things need adjustment. Never tested construct trait matching much as we did not expose it via clang. Just delete the line if you think it's wrong. Also consider deleting the code before if it's not needed. Comment at: clang/lib/Sema/SemaOpenMP.cpp:737 + } + void eraseConstructTrait() { ConstructTraits.clear(); } + Modification was made to make it clear what happens but now I think this is not needed. See below first. Maybe the interface should be "handleConstructTrait(Property, bool Insert)" and if Insert is false you verify it's the last one and delete it. You'd need to traverse them in opposite order though to verify, unsure if verification is therefore strictly necessary. Comment at: clang/lib/Sema/SemaOpenMP.cpp:3894-3896 + if (isOpenMPWorksharingDirective(DKind)) { +Stack->addConstructTrait(llvm::omp::TraitProperty::construct_for_for); + } Comment at: clang/lib/Sema/SemaOpenMP.cpp:4491 ArrayRef Clauses) { + DSAStack->eraseConstructTrait(); if (DSAStack->getCurrentDirective() == OMPD_atomic || I know think you need to call something like `addDeclareVariantConstructTrait` again and do pop_back() instead of push_back() for each trait matching the DKind. Clearing, or erasing a fixed amount of traits, does not work. Comment at: clang/lib/Sema/SemaOpenMP.cpp:6838 + for (auto Property : DSAStack->getConstructTraits()) +OMPCtx.addTrait(Property); Maybe we should pass the traits to the constructor, makes it clearer they have to be passed. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen added inline comments. Comment at: clang/test/OpenMP/declare_variant_construct_codegen_1.c:23 +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); This test is from sollve test suite: https://github.com/SOLLVE/sollve_vv/blob/master/tests/5.0/declare_variant/test_declare_variant.c Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen added inline comments. Comment at: clang/lib/AST/OpenMPClause.cpp:2346 - VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); + // VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); } I'm now removing this line since the properties are already added at line 2334 and not removing this line would lead to an extra iteration in `OMPContext.cpp` (check out the second diff), and the code will always return false even if the construct trait properties are added. Diff for OpenMPClause.cpp ``` 2333 for (const OMPTraitProperty : Selector.Properties) 2334 VMI.addTrait(Set.Kind, Property.Kind, Property.RawString, ScorePtr); 2335 2336 if (Set.Kind != TraitSet::construct) 2337 continue; 2338 2339 // TODO: This might not hold once we implement SIMD properly. 2340 assert(Selector.Properties.size() == 1 && 2341 Selector.Properties.front().Kind == 2342 getOpenMPContextTraitPropertyForSelector( 2343 Selector.Kind) && 2344 "Ill-formed construct selector!"); 2345 2346 // VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); ``` Diff for `lib/Frontend/OpenMP/OMPContext.cpp`: ``` 224 unsigned ConstructIdx = 0, NoConstructTraits = Ctx.ConstructTraits.size(); 225 for (TraitProperty Property : VMI.ConstructTraits) { 226 assert(getOpenMPContextTraitSetForProperty(Property) == 227 TraitSet::construct && 228 "Variant context is ill-formed!"); 229 230 // Verify the nesting. 231 bool FoundInOrder = false; 232 while (!FoundInOrder && ConstructIdx != NoConstructTraits) 233 FoundInOrder = (Ctx.ConstructTraits[ConstructIdx++] == Property); 234 if (ConstructMatches) 235 ConstructMatches->push_back(ConstructIdx - 1); 236 237 Optional Result = HandleTrait(Property, FoundInOrder); 238 if (Result.hasValue()) 239 return Result.getValue(); 240 241 if (!FoundInOrder) { 242 LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property " 243 << getOpenMPContextTraitPropertyName(Property, "") 244 << " was not nested properly.\n"); 245 return false; 246 } 247 248 // TODO: Verify SIMD 249 } ``` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D109635/new/ https://reviews.llvm.org/D109635 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D109635: [WIP][OpenMP] Support construct trait set for Clang
cchen created this revision. cchen added a reviewer: jdoerfert. Herald added subscribers: guansong, yaxunl. cchen requested review of this revision. Herald added subscribers: cfe-commits, sstefan1. Herald added a project: clang. This patch supports construct trait set selector by using the existed declare variant infrastructure inside `OMPContext` and simd selector is currently not supported. The goal of this patch is to pass the declare variant test inside sollve test suite. TODO for now: - Add more tests (now it's only testing for target and parallel) - I removed one line of code in `clang/lib/AST/OpenMPClause.cpp` and I need to resolve this since not removing this line would prevent everything to work in this patch (will leave comment for asking the question) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D109635 Files: clang/lib/AST/OpenMPClause.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/declare_variant_construct_codegen_1.c Index: clang/test/OpenMP/declare_variant_construct_codegen_1.c === --- /dev/null +++ clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); +void t_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_vxv) match(construct={target}) +#pragma omp declare variant(p_vxv) match(construct={parallel}) +void vxv(int *v1, int *v2, int *v3, int n) { +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} +// CK1: define dso_local void @vxv + +void p_vxv(int *v1, int *v2, int *v3, int n) { +#pragma omp for +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} +// CK1: define dso_local void @p_vxv + +#pragma omp declare target +void t_vxv(int *v1, int *v2, int *v3, int n) { +#pragma distribute simd +for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target +// CK1: define dso_local void @t_vxv + + +// CK1-LABEL: define {{[^@]+}}@main +int main() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { +v1[i] = (i + 1); +v2[i] = -(i + 1); +v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { +vxv(v1, v2, v3, N); + } +// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) + + vxv(v1, v2, v3, N); +// CK1: call void @vxv + +#pragma omp parallel + { +vxv(v1, v2, v3, N); + } +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void + + return 0; +} + +//