Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao added a comment. In http://reviews.llvm.org/D18110#441959, @ABataev wrote: > Hi Daniel, > Will fix it ASAP Hi Alexey, Just to make sure: the bug I am talking about is fixed by this patch, that's why I didn't abandon this patch, instead I just rebased it. Sorry if my comment was misleading. Thanks, Samuel http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
ABataev added a comment. Hi Daniel, Will fix it ASAP Best regards, Alexey Bataev = Software Engineer Intel Compiler Team 27.05.2016 1:33, Samuel Antao пишет: > sfantao updated the summary for this revision. > > http://reviews.llvm.org/D18110 http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
ABataev accepted this revision. ABataev added a comment. This revision is now accepted and ready to land. LG http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao updated this revision to Diff 58704. sfantao updated the summary for this revision. sfantao added a comment. Remove most of the logic in the first diff. It is no longer necessary given that all firstprivate captures are now passed by value no matter the directive they are captured in. Still, there is a small fix related with template function and capture of globals. http://reviews.llvm.org/D18110 Files: lib/Sema/SemaOpenMP.cpp test/OpenMP/target_codegen_global_capture.cpp Index: test/OpenMP/target_codegen_global_capture.cpp === --- test/OpenMP/target_codegen_global_capture.cpp +++ test/OpenMP/target_codegen_global_capture.cpp @@ -21,6 +21,11 @@ // CHECK-DAG: [[BB:@.+]] = internal global float 1.00e+01 // CHECK-DAG: [[BC:@.+]] = internal global float 1.10e+01 // CHECK-DAG: [[BD:@.+]] = internal global float 1.20e+01 +// CHECK-DAG: [[TBA:@.+]] = {{.*}}global float 1.70e+01 +// CHECK-DAG: [[TBB:@.+]] = {{.*}}global float 1.80e+01 +// CHECK-DAG: [[TBC:@.+]] = {{.*}}global float 1.90e+01 +// CHECK-DAG: [[TBD:@.+]] = {{.*}}global float 2.00e+01 + double Ga = 1.0; double Gb = 2.0; double Gc = 3.0; @@ -42,14 +47,14 @@ static float Sd = 8.0; // CHECK-DAG:[[VALLB:%.+]] = load i16, i16* [[LB]], - // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb, - // CHECK-DAG:[[VALFB:%.+]] = load float, float* @_ZZ3fooE2Sb, - // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc, + // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* [[GB]], + // CHECK-DAG:[[VALFB:%.+]] = load float, float* [[FB]], + // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* [[GC]], // CHECK-DAG:[[VALLC:%.+]] = load i16, i16* [[LC]], - // CHECK-DAG:[[VALFC:%.+]] = load float, float* @_ZZ3fooE2Sc, + // CHECK-DAG:[[VALFC:%.+]] = load float, float* [[FC]], // CHECK-DAG:[[VALLD:%.+]] = load i16, i16* [[LD]], - // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd, - // CHECK-DAG:[[VALFD:%.+]] = load float, float* @_ZZ3fooE2Sd, + // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* [[GD]], + // CHECK-DAG:[[VALFD:%.+]] = load float, float* [[FD]], // 3 local vars being captured. @@ -178,14 +183,156 @@ #pragma omp parallel { // CHECK-DAG:[[VALLB:%.+]] = load i16, i16* [[LLB]], -// CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb, -// CHECK-DAG:[[VALFB:%.+]] = load float, float* @_ZZ3barE2Sb, -// CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc, +// CHECK-64-DAG: [[VALGB:%.+]] = load double, double* [[GB]], +// CHECK-DAG:[[VALFB:%.+]] = load float, float* [[BB]], +// CHECK-64-DAG: [[VALGC:%.+]] = load double, double* [[GC]], +// CHECK-DAG:[[VALLC:%.+]] = load i16, i16* [[LLC]], +// CHECK-DAG:[[VALFC:%.+]] = load float, float* [[BC]], +// CHECK-DAG:[[VALLD:%.+]] = load i16, i16* [[LLD]], +// CHECK-64-DAG: [[VALGD:%.+]] = load double, double* [[GD]], +// CHECK-DAG:[[VALFD:%.+]] = load float, float* [[BD]], + +// 3 local vars being captured. + +// CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]], +// CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16* +// CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]], +// CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8* +// CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]], +// CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + +// CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]], +// CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16* +// CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]], +// CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8* +// CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]], +// CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + +// CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]], +// CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16* +// CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]], +// CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8* +// CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]], +// CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + +// 3 static vars being captured. + +// CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]], +// CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float* +// CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]], +// CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8* +// CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]], +// CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + +
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
I think this code must be removed and we need to start with the basic general solution. I missed this part of the code accidentally, but it does not mean that I agree with it. Best regards, Alexey Bataev = Software Engineer Intel Compiler Team 05.04.2016 0:39, Samuel Antao пишет: > sfantao added a comment. > > I just wanted to add to what Carlo just said, that the feature to capture by > value is already used in the offloading upstreamed code. This is not a new > feature we are proposing , it is already there used in the code and tested in > relevant regression tests. This patch is just about fixing a bug that we > identified related to that. > > Changing back to capture everything by reference will require > reverting/refactor code that is already upstreamed and functional. On top of > that, as per the spec and also because of the aforementioned performance > issues, we would have to go to capture things by value anyway. So, it looks > to me that what you are suggesting is to move backwards, so I'd rather > discuss what are the specific concerns with this patch or the by-value > captures, so that we can properly address them instead of just delaying the > discussion and have to move backwards in the meantime. > > Thanks! > Samuel > > > http://reviews.llvm.org/D18110 > > > ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
Carlo, I don't agree with your analysis. 1. This is how it works now. It does not say that the argument must be passed by value. Later we may add some generic optimization to optimize not only target specific code, but also non-target specific code. 2. In this case such constructs must be gathered in an implicit OMPFirstprivateClause to be handled automatically by the existing codegen. 3. It just means this is variable must be gathered in an implicit OMPPrivateClause and handled by automatic codegen procedure. Again, I think you must start with basic implementation. I think if we will start from the basic implementation, some problems may be solved without any preliminary optimizations. That's why I insist on non-optimized version of the code as the first step. Best regards, Alexey Bataev = Software Engineer Intel Compiler Team 04.04.2016 18:35, Carlo Bertolli пишет: > carlo.bertolli added a comment. > > Hi Alexey > > I understand your concerns and share the danger of working on optimizations > before a correct implementation is actually in place. > I do not think this is the case. My comment was very precise about the > cause-effect: the OpenMP specification was defined in a certain way, and that > is what we should implement, and the consequence of that is an optimized > implementation compared to what we had in OpenMP 4.0. > > Here are the passages of the OpenMP specifications where I think this is > specified (from OpenMP 4.5 specification text): > > - Page 197, description of firstprivate: "A list item that appears in a > firstprivate clause is subject to the private clause semantics described in > Section 2.15.3.3 on page 192, except as noted. In addition,** the new list > item is initialized from the original list item existing before the > construct**." > > - Page 105, restrictions of target: "If an array section is a list item in a > map clause and the array section is derived from a variable for which the > type is pointer then the data-sharing attribute for that variable in the > construct is firstprivate. Prior to the execution of the construct, **the > private variable is initialized with the address of the storage location of > the corresponding array section in the device data environment**." > > - Page 105, following paragraph: "If a zero-length array section is a list > item in a map clause, and the array section is derived from a variable for > the which the type is pointer then** that variable is initialized with the > address of the corresponding storage location in the device data > environment**." > > In this I read that whenever we have a map with an array section, or a map on > a zero-length array section, or a firstprivate on any kind of variable, we > need to copy the value of the variable (scalar, pointer) into the > corresponding private variable. > > Implementing this with references would work indeed, but it would be like > implementing pass-by-value using references. > > We had long discussion about this with various members of the OpenMP > committee on how to implement this correctly. This is why I am insisting on > this not being an optimization. > > However, this may not apply here (that is why I conditioned by previous > comment) or you still think that we should always pass references for any > kind of argument to target regions. It is your call, but I wanted to make > sure that this is put in the right light. > > Thanks! > > - Carlo > > > http://reviews.llvm.org/D18110 > > > ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao added a comment. I just wanted to add to what Carlo just said, that the feature to capture by value is already used in the offloading upstreamed code. This is not a new feature we are proposing , it is already there used in the code and tested in relevant regression tests. This patch is just about fixing a bug that we identified related to that. Changing back to capture everything by reference will require reverting/refactor code that is already upstreamed and functional. On top of that, as per the spec and also because of the aforementioned performance issues, we would have to go to capture things by value anyway. So, it looks to me that what you are suggesting is to move backwards, so I'd rather discuss what are the specific concerns with this patch or the by-value captures, so that we can properly address them instead of just delaying the discussion and have to move backwards in the meantime. Thanks! Samuel http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
carlo.bertolli added a comment. Hi Alexey I understand your concerns and share the danger of working on optimizations before a correct implementation is actually in place. I do not think this is the case. My comment was very precise about the cause-effect: the OpenMP specification was defined in a certain way, and that is what we should implement, and the consequence of that is an optimized implementation compared to what we had in OpenMP 4.0. Here are the passages of the OpenMP specifications where I think this is specified (from OpenMP 4.5 specification text): - Page 197, description of firstprivate: "A list item that appears in a firstprivate clause is subject to the private clause semantics described in Section 2.15.3.3 on page 192, except as noted. In addition,** the new list item is initialized from the original list item existing before the construct**." - Page 105, restrictions of target: "If an array section is a list item in a map clause and the array section is derived from a variable for which the type is pointer then the data-sharing attribute for that variable in the construct is firstprivate. Prior to the execution of the construct, **the private variable is initialized with the address of the storage location of the corresponding array section in the device data environment**." - Page 105, following paragraph: "If a zero-length array section is a list item in a map clause, and the array section is derived from a variable for the which the type is pointer then** that variable is initialized with the address of the corresponding storage location in the device data environment**." In this I read that whenever we have a map with an array section, or a map on a zero-length array section, or a firstprivate on any kind of variable, we need to copy the value of the variable (scalar, pointer) into the corresponding private variable. Implementing this with references would work indeed, but it would be like implementing pass-by-value using references. We had long discussion about this with various members of the OpenMP committee on how to implement this correctly. This is why I am insisting on this not being an optimization. However, this may not apply here (that is why I conditioned by previous comment) or you still think that we should always pass references for any kind of argument to target regions. It is your call, but I wanted to make sure that this is put in the right light. Thanks! - Carlo http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
Carlo, Again you're talking about performance loss. I don't think that performance loss violates some parsing/semantic/operation rules. If we won't pass the argument by value the code will not work properly? OpenMP 4.5 does not specify anything about passing firstprivate args by value or by reference. I'm not against optimization. But what I'm talking about is: let's start with basic support and then, we the basic part of the code is committed and the design/architecture of the solution is stable enough, we will work on optimizations. Because early optimization does nopt allow to simplify design of the whole OpenMP implementation and it leads to the code that very hard to modify, to read and to mantain. We already have a lot of troubles with Sema part, it is very complex, not very good structured. I don't want to have the same troubles with the codegen part also. Best regards, Alexey Bataev = Software Engineer Intel Compiler Team 01.04.2016 19:10, Carlo Bertolli пишет: > carlo.bertolli added a comment. > > Hi > > If I understand correctly the problem, I would like to add something on top > of Samuel's comment. > My understanding is that Alexey is suggesting that we pass a reference type > to kernels for every pointer input parameter, regardless of the actual type > and data sharing attribute of the related variable. Ignore the remained of > this comment if this is not the case. > > In my viewpoint, this violates the basic logic for which target firstprivate > was introduced in the OpenMP specification: to avoid having to pass to GPU > kernels a reference to every pointer variable. In fact, having to pass a > reference results in ptxas generating an additional register for each input > parameter. We saw significant performance differences in this respect, and > reported about this in a paper at SC15. This has been reported by various > members of the OpenMP committee multiple times as the reason why the target > firstprivate logic is defined the way it is. > > In conclusion, I do not see this patch as an optimization, but as a way of > correctly implementing what the OpenMP specification clearly state. Any other > implementation is wrong, not a simpler, non optimized version. > > > http://reviews.llvm.org/D18110 > > > ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
ABataev added inline comments. Comment at: lib/Sema/SemaOpenMP.cpp:816-822 @@ -801,6 +815,9 @@ + + // A DSA refers to this captured region if the parent contexts match. + auto *ParentContext = RSI->TheCapturedDecl->getParent(); for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I) -if (I->CurScope == S) +if (I->ParentDeclContext == ParentContext) return I->Directive; return OMPD_unknown; } sfantao wrote: > ABataev wrote: > > sfantao wrote: > > > ABataev wrote: > > > > Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() > > > > function. I'm not sure that it is even required. It looks like some > > > > optimization hack in the frontend. I'm against any not-necessary > > > > optimizations in frontend. If scalar value is a firstprivate, it must > > > > be handled in codegen, not by handling it by copy. > > > 'IsOpenMPCapturedByRef' goal is to change the signature of the outlined > > > function. We don't want to have reference types in target region > > > arguments unless they are really required. We have seen performance being > > > greatly affected just because of that. Of course a consequence of this is > > > to have variables that become first private. > > > > > > The current implementation of OpenMP firstprivate doesn't change the the > > > function signatures, only the codegen inside the region is affected. So, > > > this is not a complete overlap. > > > > > > With this, I am not saying that this cannot be done in codegen. The > > > reason I am doing it here is your initial guideline that we should > > > attempt to fix most of the things in Sema and avoid complicating codegen > > > which is a more sensitive part. > > > > > > Doing this in Codegen would require changing the implementation of > > > `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We > > > wouldn't need the tracking of the context anymore (checking the directive > > > would be sufficient), but we would still need to see what't in the map > > > clauses. > > > > > > So, do you think we should change this? > > I think we should not dot it at all for now. Performance is an important > > thing, of course, but at first we must have just working solution and only > > after that we will think about performance. > > Early optimization leads to many troubles. The code becomes very hard to > > understand. It just increases the time of the development. > > I suggest to remove all optimizations for now and emit the code as is, > > before we have a working solution. > > Also, this function may lead to incorrect codegen. You don't check that > > CapturedRegion is actually an OpenMP region. If we have non-OpenMP cpatured > > region between OpenMP regions it will lead to incorrect code. > I don't agree. As you know, several components on the codegen depend on how > things are captured. So in my view, identifying the best possible > implementation (including performance-wise) as early as possible is in our > best interest to avoid major refactoring down the road. > > All the offloading implementation that we have working today in the trunk is > relying on this already. So, by rolling back to capture by reference > everything, will require to refactor all the offloading > code/patches/regression tests just to have to redo them again in the near > future because of the issues we have identified already. > > I'd rather spend that effort to have the capture by copy to work in a > acceptable way and I can move this logic to codegen if you think that is the > best way to do it. Note that will require checking the expressions of the map > clause in `GenerateOpenMPCapturedVars` and > `GenerateOpenMPCapturedStmtFunction`, something that Sema will have to do > anyway. > > Let me know your thoughts. > > I don't like this series of patches because they are very intrusive. They add a lot of code, that is very hard to read and understand, very hard to refactor. That's why I'd prefer to see a simple solution at first and only after that try to optimize something. We had the same problems with the performance of non-offloading part. But the very first thing that must be implemented - functionality. Optmization, performance etc. is the second step. It is better to add optimization only after the basic functionality is implemented. So, yes, I think you must remove all optimizations for now and start from basic implementation. I don't like the idea of adding new fields. tryCaptureVariable() function has variable OpenMPLevel. I think it can be used for proper syncing between CapturedRegionInfo and OpenMP region. But I'd prefer to see all these stuff only after the main part of codegen for target directive is implemented http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
carlo.bertolli added a comment. Hi If I understand correctly the problem, I would like to add something on top of Samuel's comment. My understanding is that Alexey is suggesting that we pass a reference type to kernels for every pointer input parameter, regardless of the actual type and data sharing attribute of the related variable. Ignore the remained of this comment if this is not the case. In my viewpoint, this violates the basic logic for which target firstprivate was introduced in the OpenMP specification: to avoid having to pass to GPU kernels a reference to every pointer variable. In fact, having to pass a reference results in ptxas generating an additional register for each input parameter. We saw significant performance differences in this respect, and reported about this in a paper at SC15. This has been reported by various members of the OpenMP committee multiple times as the reason why the target firstprivate logic is defined the way it is. In conclusion, I do not see this patch as an optimization, but as a way of correctly implementing what the OpenMP specification clearly state. Any other implementation is wrong, not a simpler, non optimized version. http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao added a comment. Hi Alexey, Comment at: lib/Sema/SemaOpenMP.cpp:816-822 @@ -801,6 +815,9 @@ + + // A DSA refers to this captured region if the parent contexts match. + auto *ParentContext = RSI->TheCapturedDecl->getParent(); for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I) -if (I->CurScope == S) +if (I->ParentDeclContext == ParentContext) return I->Directive; return OMPD_unknown; } ABataev wrote: > sfantao wrote: > > ABataev wrote: > > > Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() > > > function. I'm not sure that it is even required. It looks like some > > > optimization hack in the frontend. I'm against any not-necessary > > > optimizations in frontend. If scalar value is a firstprivate, it must be > > > handled in codegen, not by handling it by copy. > > 'IsOpenMPCapturedByRef' goal is to change the signature of the outlined > > function. We don't want to have reference types in target region arguments > > unless they are really required. We have seen performance being greatly > > affected just because of that. Of course a consequence of this is to have > > variables that become first private. > > > > The current implementation of OpenMP firstprivate doesn't change the the > > function signatures, only the codegen inside the region is affected. So, > > this is not a complete overlap. > > > > With this, I am not saying that this cannot be done in codegen. The reason > > I am doing it here is your initial guideline that we should attempt to fix > > most of the things in Sema and avoid complicating codegen which is a more > > sensitive part. > > > > Doing this in Codegen would require changing the implementation of > > `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We > > wouldn't need the tracking of the context anymore (checking the directive > > would be sufficient), but we would still need to see what't in the map > > clauses. > > > > So, do you think we should change this? > I think we should not dot it at all for now. Performance is an important > thing, of course, but at first we must have just working solution and only > after that we will think about performance. > Early optimization leads to many troubles. The code becomes very hard to > understand. It just increases the time of the development. > I suggest to remove all optimizations for now and emit the code as is, before > we have a working solution. > Also, this function may lead to incorrect codegen. You don't check that > CapturedRegion is actually an OpenMP region. If we have non-OpenMP cpatured > region between OpenMP regions it will lead to incorrect code. I don't agree. As you know, several components on the codegen depend on how things are captured. So in my view, identifying the best possible implementation (including performance-wise) as early as possible is in our best interest to avoid major refactoring down the road. All the offloading implementation that we have working today in the trunk is relying on this already. So, by rolling back to capture by reference everything, will require to refactor all the offloading code/patches/regression tests just to have to redo them again in the near future because of the issues we have identified already. I'd rather spend that effort to have the capture by copy to work in a acceptable way and I can move this logic to codegen if you think that is the best way to do it. Note that will require checking the expressions of the map clause in `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`, something that Sema will have to do anyway. Let me know your thoughts. http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
ABataev added inline comments. Comment at: lib/Sema/SemaOpenMP.cpp:816-822 @@ -801,6 +815,9 @@ + + // A DSA refers to this captured region if the parent contexts match. + auto *ParentContext = RSI->TheCapturedDecl->getParent(); for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I) -if (I->CurScope == S) +if (I->ParentDeclContext == ParentContext) return I->Directive; return OMPD_unknown; } sfantao wrote: > ABataev wrote: > > Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() > > function. I'm not sure that it is even required. It looks like some > > optimization hack in the frontend. I'm against any not-necessary > > optimizations in frontend. If scalar value is a firstprivate, it must be > > handled in codegen, not by handling it by copy. > 'IsOpenMPCapturedByRef' goal is to change the signature of the outlined > function. We don't want to have reference types in target region arguments > unless they are really required. We have seen performance being greatly > affected just because of that. Of course a consequence of this is to have > variables that become first private. > > The current implementation of OpenMP firstprivate doesn't change the the > function signatures, only the codegen inside the region is affected. So, this > is not a complete overlap. > > With this, I am not saying that this cannot be done in codegen. The reason I > am doing it here is your initial guideline that we should attempt to fix most > of the things in Sema and avoid complicating codegen which is a more > sensitive part. > > Doing this in Codegen would require changing the implementation of > `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We > wouldn't need the tracking of the context anymore (checking the directive > would be sufficient), but we would still need to see what't in the map > clauses. > > So, do you think we should change this? I think we should not dot it at all for now. Performance is an important thing, of course, but at first we must have just working solution and only after that we will think about performance. Early optimization leads to many troubles. The code becomes very hard to understand. It just increases the time of the development. I suggest to remove all optimizations for now and emit the code as is, before we have a working solution. Also, this function may lead to incorrect codegen. You don't check that CapturedRegion is actually an OpenMP region. If we have non-OpenMP cpatured region between OpenMP regions it will lead to incorrect code. http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao added a comment. Hi Alexey, Thanks for the review. Comment at: lib/Sema/SemaOpenMP.cpp:816-822 @@ -801,6 +815,9 @@ + + // A DSA refers to this captured region if the parent contexts match. + auto *ParentContext = RSI->TheCapturedDecl->getParent(); for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I) -if (I->CurScope == S) +if (I->ParentDeclContext == ParentContext) return I->Directive; return OMPD_unknown; } ABataev wrote: > Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() > function. I'm not sure that it is even required. It looks like some > optimization hack in the frontend. I'm against any not-necessary > optimizations in frontend. If scalar value is a firstprivate, it must be > handled in codegen, not by handling it by copy. 'IsOpenMPCapturedByRef' goal is to change the signature of the outlined function. We don't want to have reference types in target region arguments unless they are really required. We have seen performance being greatly affected just because of that. Of course a consequence of this is to have variables that become first private. The current implementation of OpenMP firstprivate doesn't change the the function signatures, only the codegen inside the region is affected. So, this is not a complete overlap. With this, I am not saying that this cannot be done in codegen. The reason I am doing it here is your initial guideline that we should attempt to fix most of the things in Sema and avoid complicating codegen which is a more sensitive part. Doing this in Codegen would require changing the implementation of `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We wouldn't need the tracking of the context anymore (checking the directive would be sufficient), but we would still need to see what't in the map clauses. So, do you think we should change this? http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
ABataev added inline comments. Comment at: lib/Sema/SemaOpenMP.cpp:816-822 @@ -801,6 +815,9 @@ + + // A DSA refers to this captured region if the parent contexts match. + auto *ParentContext = RSI->TheCapturedDecl->getParent(); for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I) -if (I->CurScope == S) +if (I->ParentDeclContext == ParentContext) return I->Directive; return OMPD_unknown; } Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() function. I'm not sure that it is even required. It looks like some optimization hack in the frontend. I'm against any not-necessary optimizations in frontend. If scalar value is a firstprivate, it must be handled in codegen, not by handling it by copy. http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao added a comment. Ping! http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao added a comment. Ping! http://reviews.llvm.org/D18110 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.
sfantao created this revision. sfantao added reviewers: ABataev, hfinkel, carlo.bertolli, arpith-jacob, kkwli0. sfantao added subscribers: fraggamuffin, caomhin, cfe-commits. Target regions require globals to be captured. The capturing scope has been determine by matching the scope of the captured region and the data sharing attribute stack (DSA stack). However, if the code is in a templated function, the scope is set to null and can no longer be used to determine scopes. This patch fixes the issue by matching the SEMA parent contexts (that are always defined) of the region scope and DSA scope. http://reviews.llvm.org/D18110 Files: lib/Sema/SemaOpenMP.cpp test/OpenMP/target_codegen_global_capture.cpp test/OpenMP/teams_codegen.cpp Index: test/OpenMP/teams_codegen.cpp === --- test/OpenMP/teams_codegen.cpp +++ test/OpenMP/teams_codegen.cpp @@ -250,12 +250,10 @@ // CK4: ret void // CK4-NEXT: } -// CK4: define {{.*}}void @{{[^,]+}}(i8*** dereferenceable({{.}}) [[ARGC1:%.+]]) -// CK4: [[ARGCADDR1:%.+]] = alloca i8*** -// CK4: store i8*** [[ARGC1]], i8 [[ARGCADDR1]] -// CK4: [[CONV1:%.+]] = load i8***, i8 [[ARGCADDR1]] -// CK4: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* {{.+}} to void (i32*, i32*, ...)*), i8*** [[CONV1]]) - +// CK4: define {{.*}}void @{{[^,]+}}(i8** [[ARGC1:%.+]]) +// CK4: [[ARGCADDR1:%.+]] = alloca i8** +// CK4: store i8** [[ARGC1]], i8*** [[ARGCADDR1]] +// CK4: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* {{.+}} to void (i32*, i32*, ...)*), i8*** [[ARGCADDR1]]) #endif // CK4 @@ -320,21 +318,22 @@ // CK5-64: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) // CK5-32: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGCADDR]]) -// CK5: define {{.*}}void @{{[^,]+}}(i{{.+}} dereferenceable({{.+}}) [[AP:%.+]], i{{.+}} dereferenceable({{.+}}) [[BP:%.+]], i{{.+}} dereferenceable({{.+}}) [[ARGC:%.+]]) +// CK5: define {{.*}}void @{{[^,]+}}(i{{.+}} [[AP:%.+]], i{{.+}} [[BP:%.+]], i8** [[ARGC:%.+]]) // CK5: [[AADDR:%.+]] = alloca i{{.+}} // CK5: [[BADDR:%.+]] = alloca i{{.+}} -// CK5: [[ARGCADDR:%.+]] = alloca i{{.+}} +// CK5: [[ARGCADDR:%.+]] = alloca i8** // CK5: [[GBL_TH_NUM:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEF_LOC_0]]) // CK5: store i{{.+}} [[AP]], i{{.+}}* [[AADDR]] // CK5: store i{{.+}} [[BP]], i{{.+}}* [[BADDR]] -// CK5: store i{{.+}} [[ARGC]], i{{.+}}* [[ARGCADDR]] -// CK5: [[A_ADDR_VAL:%.+]] = load i32*, i32** [[AADDR]] -// CK5: [[B_ADDR_VAL:%.+]] = load i32*, i32** [[BADDR]] -// CK5: [[ARGC_ADDR_VAL:%.+]] = load i{{.+}}, i{{.+}}* [[ARGCADDR]] -// CK5: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR_VAL]] -// CK5: [[B_VAL:%.+]] = load i32, i32* [[B_ADDR_VAL]] -// CK5: {{.+}} = call i32 @__kmpc_push_num_teams(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TH_NUM]], i32 [[A_VAL]], i32 [[B_VAL]]) -// CK5: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{.+}})* @.omp_outlined.{{.+}} to void (i32*, i32*, ...)*), i{{.+}} [[ARGC_ADDR_VAL]]) +// CK5: store i8** [[ARGC]], i8*** [[ARGCADDR]] +// CK5-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32* +// CK5-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32* +// CK5-64: [[ACONVVAL:%.+]] = load i32, i32* [[ACONV]] +// CK5-64: [[BCONVVAL:%.+]] = load i32, i32* [[BCONV]] +// CK5-32: [[ACONVVAL:%.+]] = load i32, i32* [[AADDR]] +// CK5-32: [[BCONVVAL:%.+]] = load i32, i32* [[BADDR]] +// CK5: {{.+}} = call i32 @__kmpc_push_num_teams(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TH_NUM]], i32 [[ACONVVAL]], i32 [[BCONVVAL]]) +// CK5: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{.+}})* @.omp_outlined.{{.+}} to void (i32*, i32*, ...)*), i8*** [[ARGCADDR]]) // CK5: ret void // CK5-NEXT: } Index: test/OpenMP/target_codegen_global_capture.cpp === --- test/OpenMP/target_codegen_global_capture.cpp +++ test/OpenMP/target_codegen_global_capture.cpp @@ -21,6 +21,11 @@ // CHECK-DAG: [[BB:@.+]] = internal global float 1.00e+01 // CHECK-DAG: [[BC:@.+]] = internal global float 1.10e+01 // CHECK-DAG: [[BD:@.+]] = internal global float 1.20e+01 +/