Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-05-27 Thread Samuel Antao via cfe-commits
sfantao added a comment.

In http://reviews.llvm.org/D18110#441959, @ABataev wrote:

> Hi Daniel,
>  Will fix it ASAP


Hi Alexey,

Just to make sure: the bug I am talking about is fixed by this patch, that's 
why I didn't abandon this patch, instead I just rebased it. Sorry if my comment 
was misleading.

Thanks,
Samuel


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-05-26 Thread Alexey Bataev via cfe-commits
ABataev added a comment.

Hi Daniel,
Will fix it ASAP

Best regards,

Alexey Bataev
=

Software Engineer
Intel Compiler Team

27.05.2016 1:33, Samuel Antao пишет:

> sfantao updated the summary for this revision.

> 

> http://reviews.llvm.org/D18110



http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-05-26 Thread Alexey Bataev via cfe-commits
ABataev accepted this revision.
ABataev added a comment.
This revision is now accepted and ready to land.

LG


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-05-26 Thread Samuel Antao via cfe-commits
sfantao updated this revision to Diff 58704.
sfantao updated the summary for this revision.
sfantao added a comment.

Remove most of the logic in the first diff. It is no longer necessary given 
that all firstprivate 
captures are now passed by value no matter the directive they are captured in.

Still, there is a small fix related with template function and capture of 
globals.


http://reviews.llvm.org/D18110

Files:
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/target_codegen_global_capture.cpp

Index: test/OpenMP/target_codegen_global_capture.cpp
===
--- test/OpenMP/target_codegen_global_capture.cpp
+++ test/OpenMP/target_codegen_global_capture.cpp
@@ -21,6 +21,11 @@
 // CHECK-DAG: [[BB:@.+]] = internal global float 1.00e+01
 // CHECK-DAG: [[BC:@.+]] = internal global float 1.10e+01
 // CHECK-DAG: [[BD:@.+]] = internal global float 1.20e+01
+// CHECK-DAG: [[TBA:@.+]] = {{.*}}global float 1.70e+01
+// CHECK-DAG: [[TBB:@.+]] = {{.*}}global float 1.80e+01
+// CHECK-DAG: [[TBC:@.+]] = {{.*}}global float 1.90e+01
+// CHECK-DAG: [[TBD:@.+]] = {{.*}}global float 2.00e+01
+
 double Ga = 1.0;
 double Gb = 2.0;
 double Gc = 3.0;
@@ -42,14 +47,14 @@
   static float Sd = 8.0;
 
   // CHECK-DAG:[[VALLB:%.+]] = load i16, i16* [[LB]],
-  // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb,
-  // CHECK-DAG:[[VALFB:%.+]] = load float, float* @_ZZ3fooE2Sb,
-  // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc,
+  // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* [[GB]],
+  // CHECK-DAG:[[VALFB:%.+]] = load float, float* [[FB]],
+  // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* [[GC]],
   // CHECK-DAG:[[VALLC:%.+]] = load i16, i16* [[LC]],
-  // CHECK-DAG:[[VALFC:%.+]] = load float, float* @_ZZ3fooE2Sc,
+  // CHECK-DAG:[[VALFC:%.+]] = load float, float* [[FC]],
   // CHECK-DAG:[[VALLD:%.+]] = load i16, i16* [[LD]],
-  // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd,
-  // CHECK-DAG:[[VALFD:%.+]] = load float, float* @_ZZ3fooE2Sd,
+  // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* [[GD]],
+  // CHECK-DAG:[[VALFD:%.+]] = load float, float* [[FD]],
 
   // 3 local vars being captured.
 
@@ -178,14 +183,156 @@
   #pragma omp parallel
   {
 // CHECK-DAG:[[VALLB:%.+]] = load i16, i16* [[LLB]],
-// CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb,
-// CHECK-DAG:[[VALFB:%.+]] = load float, float* @_ZZ3barE2Sb,
-// CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc,
+// CHECK-64-DAG: [[VALGB:%.+]] = load double, double* [[GB]],
+// CHECK-DAG:[[VALFB:%.+]] = load float, float* [[BB]],
+// CHECK-64-DAG: [[VALGC:%.+]] = load double, double* [[GC]],
+// CHECK-DAG:[[VALLC:%.+]] = load i16, i16* [[LLC]],
+// CHECK-DAG:[[VALFC:%.+]] = load float, float* [[BC]],
+// CHECK-DAG:[[VALLD:%.+]] = load i16, i16* [[LLD]],
+// CHECK-64-DAG: [[VALGD:%.+]] = load double, double* [[GD]],
+// CHECK-DAG:[[VALFD:%.+]] = load float, float* [[BD]],
+
+// 3 local vars being captured.
+
+// CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
+// CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
+// CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
+// CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
+// CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
+// CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+// CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
+// CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
+// CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
+// CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
+// CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
+// CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+// CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
+// CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
+// CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
+// CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
+// CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
+// CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+// 3 static vars being captured.
+
+// CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
+// CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
+// CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
+// CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
+// CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
+// CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+

Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-05 Thread Alexey Bataev via cfe-commits
I think this code must be removed and we need to start with the basic 
general solution. I missed this part of the code accidentally, but it 
does not mean that I agree with it.

Best regards,
Alexey Bataev
=
Software Engineer
Intel Compiler Team

05.04.2016 0:39, Samuel Antao пишет:
> sfantao added a comment.
>
> I just wanted to add to what Carlo just said, that the feature to capture by 
> value is already used in the offloading upstreamed code. This is not a new 
> feature we are proposing , it is already there used in the code and tested in 
> relevant regression tests. This patch is just about fixing a bug that we 
> identified related to that.
>
> Changing back to capture everything by reference will require 
> reverting/refactor code that is already upstreamed and functional. On top of 
> that, as per the spec and also because of the aforementioned performance 
> issues, we would have to go to capture things by value anyway. So, it looks 
> to me that what you are suggesting is to move backwards, so I'd rather 
> discuss what are the specific concerns with this patch or the by-value 
> captures, so that we can properly address them instead of just delaying the 
> discussion and have to move backwards in the meantime.
>
> Thanks!
> Samuel
>
>
> http://reviews.llvm.org/D18110
>
>
>

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-05 Thread Alexey Bataev via cfe-commits
Carlo,
I don't agree with your analysis.
1. This is how it works now. It does not say that the argument must be 
passed by value. Later we may add some generic optimization to optimize 
not only target specific code, but also non-target specific code.
2. In this case such constructs must be gathered in an implicit 
OMPFirstprivateClause to be handled automatically by the existing codegen.
3. It just means this is variable must be gathered in an implicit 
OMPPrivateClause and handled by automatic codegen procedure.

Again, I think you must start with basic implementation. I think if we 
will start from the basic implementation, some problems may be solved 
without any preliminary optimizations.
That's why I insist on non-optimized version of the code as the first step.

Best regards,
Alexey Bataev
=
Software Engineer
Intel Compiler Team

04.04.2016 18:35, Carlo Bertolli пишет:
> carlo.bertolli added a comment.
>
> Hi Alexey
>
> I understand your concerns and share the danger of working on optimizations 
> before a correct implementation is actually in place.
> I do not think this is the case. My comment was very precise about the 
> cause-effect: the OpenMP specification was defined in a certain way, and that 
> is what we should implement, and the consequence of that is an optimized 
> implementation compared to what we had in OpenMP 4.0.
>
> Here are the passages of the OpenMP specifications where I think this is 
> specified (from OpenMP 4.5 specification text):
>
> - Page 197, description of firstprivate: "A list item that appears in a 
> firstprivate clause is subject to the private clause semantics described in 
> Section 2.15.3.3 on page 192, except as noted. In addition,** the new list 
> item is initialized from the original list item existing before the 
> construct**."
>
> - Page 105, restrictions of target: "If an array section is a list item in a 
> map clause and the array section is derived from a variable for which the 
> type is pointer then the data-sharing attribute for that variable in the 
> construct is firstprivate. Prior to the execution of the construct, **the 
> private variable is initialized with the address of the storage location of 
> the corresponding array section in the device data environment**."
>
> - Page 105, following paragraph: "If a zero-length array section is a list 
> item in a map clause, and the array section is derived from a variable for 
> the which the type is pointer then** that variable is initialized with the 
> address of the corresponding storage location in the device data 
> environment**."
>
> In this I read that whenever we have a map with an array section, or a map on 
> a zero-length array section, or a firstprivate on any kind of variable, we 
> need to copy the value of the variable (scalar, pointer) into the 
> corresponding private variable.
>
> Implementing this with references would work indeed, but it would be like 
> implementing pass-by-value using references.
>
> We had long discussion about this with various members of the OpenMP 
> committee on how to implement this correctly. This is why I am insisting on 
> this not being an optimization.
>
> However, this may not apply here (that is why I conditioned by previous 
> comment) or you still think that we should always pass references for any 
> kind of argument to target regions. It is your call, but I wanted to make 
> sure that this is put in the right light.
>
> Thanks!
>
> - Carlo
>
>
> http://reviews.llvm.org/D18110
>
>
>

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-04 Thread Samuel Antao via cfe-commits
sfantao added a comment.

I just wanted to add to what Carlo just said, that the feature to capture by 
value is already used in the offloading upstreamed code. This is not a new 
feature we are proposing , it is already there used in the code and tested in 
relevant regression tests. This patch is just about fixing a bug that we 
identified related to that.

Changing back to capture everything by reference will require 
reverting/refactor code that is already upstreamed and functional. On top of 
that, as per the spec and also because of the aforementioned performance 
issues, we would have to go to capture things by value anyway. So, it looks to 
me that what you are suggesting is to move backwards, so I'd rather discuss 
what are the specific concerns with this patch or the by-value captures, so 
that we can properly address them instead of just delaying the discussion and 
have to move backwards in the meantime.

Thanks!
Samuel


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-04 Thread Carlo Bertolli via cfe-commits
carlo.bertolli added a comment.

Hi Alexey

I understand your concerns and share the danger of working on optimizations 
before a correct implementation is actually in place.
I do not think this is the case. My comment was very precise about the 
cause-effect: the OpenMP specification was defined in a certain way, and that 
is what we should implement, and the consequence of that is an optimized 
implementation compared to what we had in OpenMP 4.0.

Here are the passages of the OpenMP specifications where I think this is 
specified (from OpenMP 4.5 specification text):

- Page 197, description of firstprivate: "A list item that appears in a 
firstprivate clause is subject to the private clause semantics described in 
Section 2.15.3.3 on page 192, except as noted. In addition,** the new list item 
is initialized from the original list item existing before the construct**."

- Page 105, restrictions of target: "If an array section is a list item in a 
map clause and the array section is derived from a variable for which the type 
is pointer then the data-sharing attribute for that variable in the construct 
is firstprivate. Prior to the execution of the construct, **the private 
variable is initialized with the address of the storage location of the 
corresponding array section in the device data environment**."

- Page 105, following paragraph: "If a zero-length array section is a list item 
in a map clause, and the array section is derived from a variable for the which 
the type is pointer then** that variable is initialized with the address of the 
corresponding storage location in the device data environment**."

In this I read that whenever we have a map with an array section, or a map on a 
zero-length array section, or a firstprivate on any kind of variable, we need 
to copy the value of the variable (scalar, pointer) into the corresponding 
private variable.

Implementing this with references would work indeed, but it would be like 
implementing pass-by-value using references.

We had long discussion about this with various members of the OpenMP committee 
on how to implement this correctly. This is why I am insisting on this not 
being an optimization.

However, this may not apply here (that is why I conditioned by previous 
comment) or you still think that we should always pass references for any kind 
of argument to target regions. It is your call, but I wanted to make sure that 
this is put in the right light.

Thanks!

- Carlo


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-03 Thread Alexey Bataev via cfe-commits
Carlo,
Again you're talking about performance loss. I don't think that 
performance loss violates some parsing/semantic/operation rules. If we 
won't pass the argument by value the code will not work properly? OpenMP 
4.5 does not specify anything about passing firstprivate args by value 
or by reference.
I'm not against optimization. But what I'm talking about is: let's start 
with basic support and then, we the basic part of the code is committed 
and the design/architecture of the solution is stable enough, we will 
work on optimizations. Because early optimization does nopt allow to 
simplify design of the whole OpenMP implementation and it leads to the 
code that very hard to modify, to read and to mantain. We already have a 
lot of troubles with Sema part, it is very complex, not very good 
structured. I don't want to have the same troubles with the codegen part 
also.

Best regards,
Alexey Bataev
=
Software Engineer
Intel Compiler Team

01.04.2016 19:10, Carlo Bertolli пишет:
> carlo.bertolli added a comment.
>
> Hi
>
> If I understand correctly the problem, I would like to add something on top 
> of Samuel's comment.
> My understanding is that Alexey is suggesting that we pass a reference type 
> to kernels for every pointer input parameter, regardless of the actual type 
> and data sharing attribute of the related variable. Ignore the remained of 
> this comment if this is not the case.
>
> In my viewpoint, this violates the basic logic for which target firstprivate 
> was introduced in the OpenMP specification: to avoid having to pass to GPU 
> kernels a reference to every pointer variable. In fact, having to pass a 
> reference results in ptxas generating an additional register for each input 
> parameter. We saw significant performance differences in this respect, and 
> reported about this in a paper at SC15. This has been reported by various 
> members of the OpenMP committee multiple times as the reason why the target 
> firstprivate logic is defined the way it is.
>
> In conclusion, I do not see this patch as an optimization, but as a way of 
> correctly implementing what the OpenMP specification clearly state. Any other 
> implementation is wrong, not a simpler, non optimized version.
>
>
> http://reviews.llvm.org/D18110
>
>
>

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-03 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/Sema/SemaOpenMP.cpp:816-822
@@ -801,6 +815,9 @@
+
+  // A DSA refers to this captured region if the parent contexts match.
+  auto *ParentContext = RSI->TheCapturedDecl->getParent();
   for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
-if (I->CurScope == S)
+if (I->ParentDeclContext == ParentContext)
   return I->Directive;
   return OMPD_unknown;
 }
 

sfantao wrote:
> ABataev wrote:
> > sfantao wrote:
> > > ABataev wrote:
> > > > Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() 
> > > > function. I'm not sure that it is even required. It looks like some 
> > > > optimization hack in the frontend. I'm against any not-necessary 
> > > > optimizations in frontend. If scalar value is a firstprivate, it must 
> > > > be handled in codegen, not by handling it by copy.
> > > 'IsOpenMPCapturedByRef' goal is to change the signature of the outlined 
> > > function. We don't want to have reference types in target region 
> > > arguments unless they are really required. We have seen performance being 
> > > greatly affected just because of that. Of course a consequence of this is 
> > > to have variables that become first private.
> > > 
> > > The current implementation of OpenMP firstprivate doesn't change the the 
> > > function signatures, only the codegen inside the region is affected. So, 
> > > this is not a complete overlap.
> > > 
> > > With this, I am not saying that this cannot be done in codegen. The 
> > > reason I am doing it here is your initial guideline that we should 
> > > attempt to fix most of the things in Sema and avoid complicating codegen 
> > > which is a more sensitive part.
> > > 
> > > Doing this in Codegen would require changing the implementation of 
> > > `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We 
> > > wouldn't need the tracking of the context anymore (checking the directive 
> > > would be sufficient), but we would still need to see what't in the map 
> > > clauses.
> > > 
> > > So, do you think we should change this?
> > I think we should not dot it at all for now. Performance is an important 
> > thing, of course, but at first we must have just working solution and only 
> > after that we will think about performance.
> > Early optimization leads to many troubles. The code becomes very hard to 
> > understand. It just increases the time of the development.
> > I suggest to remove all optimizations for now and emit the code as is, 
> > before we have a working solution.
> > Also, this function may lead to incorrect codegen. You don't check that 
> > CapturedRegion is actually an OpenMP region. If we have non-OpenMP cpatured 
> > region between OpenMP regions it will lead to incorrect code.
> I don't agree. As you know, several components on the codegen depend on how 
> things are captured. So in my view, identifying the best possible 
> implementation (including performance-wise) as early as possible is in our 
> best interest to avoid major refactoring down the road.
> 
> All the offloading implementation that we have working today in the trunk is 
> relying on this already. So, by rolling back to capture by reference 
> everything, will require to refactor all the offloading 
> code/patches/regression tests just to have to redo them again in the near 
> future because of the issues we have identified already. 
> 
> I'd rather spend that effort to have the capture by copy to work in a 
> acceptable way and I can move this logic to codegen if you think that is the 
> best way to do it. Note that will require checking the expressions of the map 
> clause in `GenerateOpenMPCapturedVars` and 
> `GenerateOpenMPCapturedStmtFunction`, something that Sema will have to do 
> anyway.
> 
> Let me know your thoughts. 
> 
> 
I don't like this series of patches because they are very intrusive. They add a 
lot of code, that is very hard to read and understand, very hard to refactor. 
That's why I'd prefer to see a simple solution at first and only after that try 
to optimize something.
We had the same problems with the performance of non-offloading part. But the 
very first thing that must be implemented - functionality. Optmization, 
performance etc. is the second step. It is better to add optimization only 
after the basic functionality is implemented.
So, yes, I think you must remove all optimizations for now and start from basic 
implementation.
I don't like the idea of adding new fields. tryCaptureVariable() function has 
variable OpenMPLevel. I think it can be used for proper syncing between 
CapturedRegionInfo and OpenMP region. But I'd prefer to see all these stuff 
only after the main part of codegen for target directive is implemented


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-01 Thread Carlo Bertolli via cfe-commits
carlo.bertolli added a comment.

Hi

If I understand correctly the problem, I would like to add something on top of 
Samuel's comment.
My understanding is that Alexey is suggesting that we pass a reference type to 
kernels for every pointer input parameter, regardless of the actual type and 
data sharing attribute of the related variable. Ignore the remained of this 
comment if this is not the case.

In my viewpoint, this violates the basic logic for which target firstprivate 
was introduced in the OpenMP specification: to avoid having to pass to GPU 
kernels a reference to every pointer variable. In fact, having to pass a 
reference results in ptxas generating an additional register for each input 
parameter. We saw significant performance differences in this respect, and 
reported about this in a paper at SC15. This has been reported by various 
members of the OpenMP committee multiple times as the reason why the target 
firstprivate logic is defined the way it is.

In conclusion, I do not see this patch as an optimization, but as a way of 
correctly implementing what the OpenMP specification clearly state. Any other 
implementation is wrong, not a simpler, non optimized version.


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-01 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Hi Alexey,



Comment at: lib/Sema/SemaOpenMP.cpp:816-822
@@ -801,6 +815,9 @@
+
+  // A DSA refers to this captured region if the parent contexts match.
+  auto *ParentContext = RSI->TheCapturedDecl->getParent();
   for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
-if (I->CurScope == S)
+if (I->ParentDeclContext == ParentContext)
   return I->Directive;
   return OMPD_unknown;
 }
 

ABataev wrote:
> sfantao wrote:
> > ABataev wrote:
> > > Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() 
> > > function. I'm not sure that it is even required. It looks like some 
> > > optimization hack in the frontend. I'm against any not-necessary 
> > > optimizations in frontend. If scalar value is a firstprivate, it must be 
> > > handled in codegen, not by handling it by copy.
> > 'IsOpenMPCapturedByRef' goal is to change the signature of the outlined 
> > function. We don't want to have reference types in target region arguments 
> > unless they are really required. We have seen performance being greatly 
> > affected just because of that. Of course a consequence of this is to have 
> > variables that become first private.
> > 
> > The current implementation of OpenMP firstprivate doesn't change the the 
> > function signatures, only the codegen inside the region is affected. So, 
> > this is not a complete overlap.
> > 
> > With this, I am not saying that this cannot be done in codegen. The reason 
> > I am doing it here is your initial guideline that we should attempt to fix 
> > most of the things in Sema and avoid complicating codegen which is a more 
> > sensitive part.
> > 
> > Doing this in Codegen would require changing the implementation of 
> > `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We 
> > wouldn't need the tracking of the context anymore (checking the directive 
> > would be sufficient), but we would still need to see what't in the map 
> > clauses.
> > 
> > So, do you think we should change this?
> I think we should not dot it at all for now. Performance is an important 
> thing, of course, but at first we must have just working solution and only 
> after that we will think about performance.
> Early optimization leads to many troubles. The code becomes very hard to 
> understand. It just increases the time of the development.
> I suggest to remove all optimizations for now and emit the code as is, before 
> we have a working solution.
> Also, this function may lead to incorrect codegen. You don't check that 
> CapturedRegion is actually an OpenMP region. If we have non-OpenMP cpatured 
> region between OpenMP regions it will lead to incorrect code.
I don't agree. As you know, several components on the codegen depend on how 
things are captured. So in my view, identifying the best possible 
implementation (including performance-wise) as early as possible is in our best 
interest to avoid major refactoring down the road.

All the offloading implementation that we have working today in the trunk is 
relying on this already. So, by rolling back to capture by reference 
everything, will require to refactor all the offloading code/patches/regression 
tests just to have to redo them again in the near future because of the issues 
we have identified already. 

I'd rather spend that effort to have the capture by copy to work in a 
acceptable way and I can move this logic to codegen if you think that is the 
best way to do it. Note that will require checking the expressions of the map 
clause in `GenerateOpenMPCapturedVars` and 
`GenerateOpenMPCapturedStmtFunction`, something that Sema will have to do 
anyway.

Let me know your thoughts. 




http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-04-01 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/Sema/SemaOpenMP.cpp:816-822
@@ -801,6 +815,9 @@
+
+  // A DSA refers to this captured region if the parent contexts match.
+  auto *ParentContext = RSI->TheCapturedDecl->getParent();
   for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
-if (I->CurScope == S)
+if (I->ParentDeclContext == ParentContext)
   return I->Directive;
   return OMPD_unknown;
 }
 

sfantao wrote:
> ABataev wrote:
> > Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() 
> > function. I'm not sure that it is even required. It looks like some 
> > optimization hack in the frontend. I'm against any not-necessary 
> > optimizations in frontend. If scalar value is a firstprivate, it must be 
> > handled in codegen, not by handling it by copy.
> 'IsOpenMPCapturedByRef' goal is to change the signature of the outlined 
> function. We don't want to have reference types in target region arguments 
> unless they are really required. We have seen performance being greatly 
> affected just because of that. Of course a consequence of this is to have 
> variables that become first private.
> 
> The current implementation of OpenMP firstprivate doesn't change the the 
> function signatures, only the codegen inside the region is affected. So, this 
> is not a complete overlap.
> 
> With this, I am not saying that this cannot be done in codegen. The reason I 
> am doing it here is your initial guideline that we should attempt to fix most 
> of the things in Sema and avoid complicating codegen which is a more 
> sensitive part.
> 
> Doing this in Codegen would require changing the implementation of 
> `GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We 
> wouldn't need the tracking of the context anymore (checking the directive 
> would be sufficient), but we would still need to see what't in the map 
> clauses.
> 
> So, do you think we should change this?
I think we should not dot it at all for now. Performance is an important thing, 
of course, but at first we must have just working solution and only after that 
we will think about performance.
Early optimization leads to many troubles. The code becomes very hard to 
understand. It just increases the time of the development.
I suggest to remove all optimizations for now and emit the code as is, before 
we have a working solution.
Also, this function may lead to incorrect codegen. You don't check that 
CapturedRegion is actually an OpenMP region. If we have non-OpenMP cpatured 
region between OpenMP regions it will lead to incorrect code.


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-03-30 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Hi Alexey,

Thanks for the review.



Comment at: lib/Sema/SemaOpenMP.cpp:816-822
@@ -801,6 +815,9 @@
+
+  // A DSA refers to this captured region if the parent contexts match.
+  auto *ParentContext = RSI->TheCapturedDecl->getParent();
   for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
-if (I->CurScope == S)
+if (I->ParentDeclContext == ParentContext)
   return I->Directive;
   return OMPD_unknown;
 }
 

ABataev wrote:
> Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() 
> function. I'm not sure that it is even required. It looks like some 
> optimization hack in the frontend. I'm against any not-necessary 
> optimizations in frontend. If scalar value is a firstprivate, it must be 
> handled in codegen, not by handling it by copy.
'IsOpenMPCapturedByRef' goal is to change the signature of the outlined 
function. We don't want to have reference types in target region arguments 
unless they are really required. We have seen performance being greatly 
affected just because of that. Of course a consequence of this is to have 
variables that become first private.

The current implementation of OpenMP firstprivate doesn't change the the 
function signatures, only the codegen inside the region is affected. So, this 
is not a complete overlap.

With this, I am not saying that this cannot be done in codegen. The reason I am 
doing it here is your initial guideline that we should attempt to fix most of 
the things in Sema and avoid complicating codegen which is a more sensitive 
part.

Doing this in Codegen would require changing the implementation of 
`GenerateOpenMPCapturedVars` and `GenerateOpenMPCapturedStmtFunction`. We 
wouldn't need the tracking of the context anymore (checking the directive would 
be sufficient), but we would still need to see what't in the map clauses.

So, do you think we should change this?


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-03-29 Thread Alexey Bataev via cfe-commits
ABataev added inline comments.


Comment at: lib/Sema/SemaOpenMP.cpp:816-822
@@ -801,6 +815,9 @@
+
+  // A DSA refers to this captured region if the parent contexts match.
+  auto *ParentContext = RSI->TheCapturedDecl->getParent();
   for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
-if (I->CurScope == S)
+if (I->ParentDeclContext == ParentContext)
   return I->Directive;
   return OMPD_unknown;
 }
 

Actually, I think, we need to rework the whole IsOpenMPCapturedByRef() 
function. I'm not sure that it is even required. It looks like some 
optimization hack in the frontend. I'm against any not-necessary optimizations 
in frontend. If scalar value is a firstprivate, it must be handled in codegen, 
not by handling it by copy.


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-03-28 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Ping!


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-03-21 Thread Samuel Antao via cfe-commits
sfantao added a comment.

Ping!


http://reviews.llvm.org/D18110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D18110: [OpenMP] Fix SEMA bug in the capture of global variables in template functions.

2016-03-11 Thread Samuel Antao via cfe-commits
sfantao created this revision.
sfantao added reviewers: ABataev, hfinkel, carlo.bertolli, arpith-jacob, kkwli0.
sfantao added subscribers: fraggamuffin, caomhin, cfe-commits.

Target regions require globals to be captured. The capturing scope has been 
determine by matching the scope of the captured region and the data sharing 
attribute stack (DSA stack). However, if the code is in a templated function, 
the scope is set to null and can no longer be used to  determine scopes.

This patch fixes the issue by matching the SEMA parent contexts (that are 
always defined) of the region scope and DSA scope.

http://reviews.llvm.org/D18110

Files:
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/target_codegen_global_capture.cpp
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===
--- test/OpenMP/teams_codegen.cpp
+++ test/OpenMP/teams_codegen.cpp
@@ -250,12 +250,10 @@
 // CK4:  ret void
 // CK4-NEXT: }
 
-// CK4:  define {{.*}}void @{{[^,]+}}(i8*** dereferenceable({{.}}) [[ARGC1:%.+]])
-// CK4:  [[ARGCADDR1:%.+]] = alloca i8***
-// CK4:  store i8*** [[ARGC1]], i8 [[ARGCADDR1]]
-// CK4:  [[CONV1:%.+]] = load i8***, i8 [[ARGCADDR1]]
-// CK4:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* {{.+}} to void (i32*, i32*, ...)*), i8*** [[CONV1]])
-
+// CK4:  define {{.*}}void @{{[^,]+}}(i8** [[ARGC1:%.+]])
+// CK4:  [[ARGCADDR1:%.+]] = alloca i8**
+// CK4:  store i8** [[ARGC1]], i8*** [[ARGCADDR1]]
+// CK4:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* {{.+}} to void (i32*, i32*, ...)*), i8*** [[ARGCADDR1]])
 
 #endif // CK4
 
@@ -320,21 +318,22 @@
 // CK5-64:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]])
 // CK5-32:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGCADDR]])
 
-// CK5:  define {{.*}}void @{{[^,]+}}(i{{.+}} dereferenceable({{.+}}) [[AP:%.+]], i{{.+}} dereferenceable({{.+}}) [[BP:%.+]], i{{.+}} dereferenceable({{.+}}) [[ARGC:%.+]])
+// CK5:  define {{.*}}void @{{[^,]+}}(i{{.+}} [[AP:%.+]], i{{.+}} [[BP:%.+]], i8** [[ARGC:%.+]])
 // CK5:  [[AADDR:%.+]] = alloca i{{.+}}
 // CK5:  [[BADDR:%.+]] = alloca i{{.+}}
-// CK5:  [[ARGCADDR:%.+]] = alloca i{{.+}}
+// CK5:  [[ARGCADDR:%.+]] = alloca i8**
 // CK5:  [[GBL_TH_NUM:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEF_LOC_0]])
 // CK5:  store i{{.+}} [[AP]], i{{.+}}* [[AADDR]]
 // CK5:  store i{{.+}} [[BP]], i{{.+}}* [[BADDR]]
-// CK5:  store i{{.+}} [[ARGC]], i{{.+}}* [[ARGCADDR]]
-// CK5:  [[A_ADDR_VAL:%.+]] = load i32*, i32** [[AADDR]]
-// CK5:  [[B_ADDR_VAL:%.+]] = load i32*, i32** [[BADDR]]
-// CK5:  [[ARGC_ADDR_VAL:%.+]] = load i{{.+}}, i{{.+}}* [[ARGCADDR]]
-// CK5:  [[A_VAL:%.+]] = load i32, i32* [[A_ADDR_VAL]]
-// CK5:  [[B_VAL:%.+]] = load i32, i32* [[B_ADDR_VAL]]
-// CK5:  {{.+}} = call i32 @__kmpc_push_num_teams(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TH_NUM]], i32 [[A_VAL]], i32 [[B_VAL]])
-// CK5:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{.+}})* @.omp_outlined.{{.+}} to void (i32*, i32*, ...)*), i{{.+}} [[ARGC_ADDR_VAL]])
+// CK5:  store i8** [[ARGC]], i8*** [[ARGCADDR]]
+// CK5-64:  [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
+// CK5-64:  [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
+// CK5-64:  [[ACONVVAL:%.+]] = load i32, i32* [[ACONV]]
+// CK5-64:  [[BCONVVAL:%.+]] = load i32, i32* [[BCONV]]
+// CK5-32:  [[ACONVVAL:%.+]] = load i32, i32* [[AADDR]]
+// CK5-32:  [[BCONVVAL:%.+]] = load i32, i32* [[BADDR]]
+// CK5:  {{.+}} = call i32 @__kmpc_push_num_teams(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TH_NUM]], i32 [[ACONVVAL]], i32 [[BCONVVAL]])
+// CK5:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{.+}})* @.omp_outlined.{{.+}} to void (i32*, i32*, ...)*), i8*** [[ARGCADDR]])
 // CK5:  ret void
 // CK5-NEXT: }
 
Index: test/OpenMP/target_codegen_global_capture.cpp
===
--- test/OpenMP/target_codegen_global_capture.cpp
+++ test/OpenMP/target_codegen_global_capture.cpp
@@ -21,6 +21,11 @@
 // CHECK-DAG: [[BB:@.+]] = internal global float 1.00e+01
 // CHECK-DAG: [[BC:@.+]] = internal global float 1.10e+01
 // CHECK-DAG: [[BD:@.+]] = internal global float 1.20e+01
+/