[PATCH] D28404: IRGen: Add optnone attribute on function during O0
This revision was automatically updated to reflect the committed changes. Closed by commit rL304127: IRGen: Add optnone attribute on function during O0 (authored by mehdi_amini). Changed prior to commit: https://reviews.llvm.org/D28404?vs=83480=100580#toc Repository: rL LLVM https://reviews.llvm.org/D28404 Files: cfe/trunk/include/clang/Driver/CC1Options.td cfe/trunk/include/clang/Frontend/CodeGenOptions.def cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/Frontend/CompilerInvocation.cpp cfe/trunk/test/CodeGen/aarch64-neon-2velem.c cfe/trunk/test/CodeGen/aarch64-neon-3v.c cfe/trunk/test/CodeGen/aarch64-neon-across.c cfe/trunk/test/CodeGen/aarch64-neon-extract.c cfe/trunk/test/CodeGen/aarch64-neon-fcvt-intrinsics.c cfe/trunk/test/CodeGen/aarch64-neon-fma.c cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c cfe/trunk/test/CodeGen/aarch64-neon-ldst-one.c cfe/trunk/test/CodeGen/aarch64-neon-misc.c cfe/trunk/test/CodeGen/aarch64-neon-perm.c cfe/trunk/test/CodeGen/aarch64-neon-scalar-copy.c cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c cfe/trunk/test/CodeGen/aarch64-neon-shifts.c cfe/trunk/test/CodeGen/aarch64-neon-tbl.c cfe/trunk/test/CodeGen/aarch64-neon-vcombine.c cfe/trunk/test/CodeGen/aarch64-neon-vget-hilo.c cfe/trunk/test/CodeGen/aarch64-neon-vget.c cfe/trunk/test/CodeGen/aarch64-poly128.c cfe/trunk/test/CodeGen/aarch64-poly64.c cfe/trunk/test/CodeGen/address-safety-attr-kasan.cpp cfe/trunk/test/CodeGen/address-safety-attr.cpp cfe/trunk/test/CodeGen/arm-crc32.c cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c cfe/trunk/test/CodeGen/arm-neon-fma.c cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c cfe/trunk/test/CodeGen/arm-neon-shifts.c cfe/trunk/test/CodeGen/arm-neon-vcvtX.c cfe/trunk/test/CodeGen/arm-neon-vget.c cfe/trunk/test/CodeGen/arm64-crc32.c cfe/trunk/test/CodeGen/arm64-lanes.c cfe/trunk/test/CodeGen/arm64_vcopy.c cfe/trunk/test/CodeGen/arm64_vdupq_n_f64.c cfe/trunk/test/CodeGen/attr-coldhot.c cfe/trunk/test/CodeGen/attr-naked.c cfe/trunk/test/CodeGen/builtins-arm-exclusive.c cfe/trunk/test/CodeGen/builtins-arm.c cfe/trunk/test/CodeGen/builtins-arm64.c cfe/trunk/test/CodeGen/noduplicate-cxx11-test.cpp cfe/trunk/test/CodeGen/pragma-weak.c cfe/trunk/test/CodeGen/unwind-attr.c cfe/trunk/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp cfe/trunk/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp cfe/trunk/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp cfe/trunk/test/CodeGenCXX/optnone-templates.cpp cfe/trunk/test/CodeGenCXX/static-init-wasm.cpp cfe/trunk/test/CodeGenCXX/thunks.cpp cfe/trunk/test/CodeGenObjC/gnu-exceptions.m cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl cfe/trunk/test/Driver/darwin-iphone-defaults.m Index: cfe/trunk/include/clang/Driver/CC1Options.td === --- cfe/trunk/include/clang/Driver/CC1Options.td +++ cfe/trunk/include/clang/Driver/CC1Options.td @@ -172,6 +172,8 @@ def disable_lifetimemarkers : Flag<["-"], "disable-lifetime-markers">, HelpText<"Disable lifetime-markers emission even when optimizations are " "enabled">; +def disable_O0_optnone : Flag<["-"], "disable-O0-optnone">, + HelpText<"Disable adding the optnone attribute to functions at O0">; def disable_red_zone : Flag<["-"], "disable-red-zone">, HelpText<"Do not emit code that uses the red zone.">; def dwarf_column_info : Flag<["-"], "dwarf-column-info">, Index: cfe/trunk/include/clang/Frontend/CodeGenOptions.def === --- cfe/trunk/include/clang/Frontend/CodeGenOptions.def +++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def @@ -53,6 +53,7 @@ ///< the pristine IR generated by the ///< frontend. CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers +CODEGENOPT(DisableO0ImplyOptNone , 1, 0) ///< Don't annonate function with optnone at O0 CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental ///< pass manager. CODEGENOPT(DisableRedZone, 1, 0) ///< Set when -mno-red-zone is enabled. Index: cfe/trunk/test/CodeGenCXX/optnone-templates.cpp === --- cfe/trunk/test/CodeGenCXX/optnone-templates.cpp +++ cfe/trunk/test/CodeGenCXX/optnone-templates.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple %itanium_abi_triple -std=c++11 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple %itanium_abi_triple -std=c++11 -disable-O0-optnone -emit-llvm -o - | FileCheck %s // Test optnone on template instantiations. Index: cfe/trunk/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
dexonsmith accepted this revision. dexonsmith added a comment. This revision is now accepted and ready to land. Actually, looking through the comments, it appears that everyone (eventually) agreed with the approach in the patch. I agree too. LGTM. Mehdi, are you able to rebase and commit, or should someone take over? https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
MatzeB added a comment. FWIW, I think this makes sense. Moving O0 and optnone get closer seems sensible. Even though -O3 with an optnone function indeed gives you different results today. We are basically maintaining two things for the same "do not optimize" goal. This obviously won't make O0 and optnone being the same in todays pass managers, but it is a step in the right direction. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#675687, @chandlerc wrote: > In https://reviews.llvm.org/D28404#675616, @mehdi_amini wrote: > > > We're still waiting for @rsmith to comment whether it'd be better to `have > > a LangOpts flag that basically means "pragma clang optimize off is always > > in effect."` and `Have Sema pretend the pragma is in effect at all times, > > at -O0`. > > > FWIW, I have no real opinion about this side of it, I see it more as a detail > of how Clang wants to implement this kind of thing. That was my suggestion as it seemed like this patch is essentially replicating the attribute-conflict detection logic that's in place for attributes specified in the source. And we do like to say DRY. But I won't insist; the patch can proceed as far as I'm concerned. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
chandlerc added a comment. Just to be explicit, I agree with Hal's summary. This seems like the right engineering tradeoff and I don't find anything particularly unsatisfying about it. In https://reviews.llvm.org/D28404#675616, @mehdi_amini wrote: > Also note that @chandlerc in r290398 made clang adding "noinline" on every > function at O0 by default, which seems very similar to what I'm doing here. > > We're still waiting for @rsmith to comment whether it'd be better to `have a > LangOpts flag that basically means "pragma clang optimize off is always in > effect."` and `Have Sema pretend the pragma is in effect at all times, at > -O0`. FWIW, I have no real opinion about this side of it, I see it more as a detail of how Clang wants to implement this kind of thing. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. Also note that @chandlerc in r290398 made clang adding "noinline" on every function at O0 by default, which seems very similar to what I'm doing here. We're still waiting for @rsmith to comment whether it'd be better to `have a LangOpts flag that basically means "pragma clang optimize off is always in effect."` and `Have Sema pretend the pragma is in effect at all times, at -O0`. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
hfinkel added a comment. In https://reviews.llvm.org/D28404#673260, @mehdi_amini wrote: > Ping :) To clarify my understanding of this thread, it seems like there are three ways forward here: 1. To have -O0 add optnone to the generated functions (enabling some degree of lack of optimization of those functions even when used with -flto) 2. To have -O0 -flto essentially turn off LTO (so that we get unoptimized objects directly for things we're debugging) 3. Add a separate flag to make optnone the default (1) is this patch. The disadvantage of (2) is that it also precludes CFI (and other whole-program transformations). This seems highly unfortunate at best and a non-starter in the general case. The disadvantage of (3) is that it might seems confusing to users (i.e. how to explain the difference between -O0 and -foptimize-off?) and is an unnecessary exposure to users of implementation details. On this point I agree. It is true that -O0 != optnone, in a technical sense, but in the end, both are best effort. Moreover, there is a tradeoff between disabling optimization of the functions you don't want to optimize and keeping the remainder of the code as similar as possible to how it would be if everything were being optimized. What optnone provides seems like a reasonable point in that tradeoff space. I think that we should move forward with this approach. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. Ping :) https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. @rsmith could you say whether it seems reasonable to have a LangOpts flag that basically means "`pragma clang optimize off` is always in effect." I think it would make the other optnone-related logic simpler. It would not be the only sort-of-codegen-related flag in LangOpts (e.g. the PIC/PIE stuff). In https://reviews.llvm.org/D28404#641538, @probinson wrote: > There is another way to make use of the attribute, which I think will be more > robust: > > Have Sema pretend the pragma is in effect at all times, at -O0. Then all the > existing conflict detection/resolution logic Just Works, and there's no need > to spend 4 lines of code hoping to replicate the correct conditions in > CodeGenModule. > > Because Sema does not have a handle on CodeGenOptions and therefore does not > a-priori know the optimization level, probably the right thing to do is move > the flag to LangOpts and set it under the correct conditions in > CompilerInvocation. It wouldn't be the first codegen-like option in LangOpts. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. I guess I'm getting irritated because people are trying to tell me what optnone means. I know what it means; I spent probably a whole year pushing to get it adopted. Optnone means: When you are running optimizations, try not to optimize this part, if you can. That's it. That's *all*. It has never meant anything else. Telling me different means you misunderstand, and trying to persuade me that *I* misunderstand is going to be a waste of time and effort. I fully understand that this is not the definition of optnone that you *want*. Please feel free to propose a redefinition. But don't go telling me that the thing you *want* is what the thing already *is* and that any difference is a bug. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
chandlerc added a comment. In https://reviews.llvm.org/D28404#641696, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#641632, @probinson wrote: > > > In https://reviews.llvm.org/D28404#641606, @mehdi_amini wrote: > > > > > If we want to support `-O0 -flto` and `optnone` it the way to convey this > > > to the optimizer, I don't see the alternative. > > > > > > optsize != -Os (according to Chandler) > > minsize != -Oz (according to Chandler) > > optnone != -O0 (according to both me and Chandler) > > > Of course, but that's just an implementation limitation, mostly for > historical reason I believe, not by design. There is certainly a lot of history here influencing this, but I think there is also some a fundamental difference. The flag is a request from the user to behave in a particular way. The LLVM attribute is a tool we use in some cases to try to satisfy that request. When we're not doing LTO, it is easier to satisfy the requests of '-O' flags. The fact that we happen to not use attributes to do it today is just an implementation detail. When we are doing LTO, satisfying different requests is hard. We should do our best, but I think it is reasonable to explain to the user that "with LTO, we can't fail to optimize with the Wombat optimization because of when one file requests -O0 and another requests -O2". Same thing for the other levels. This seems precisely analogous to the fact that even when the user requests -O0, we will do some inlining. Why? Because we *have to* for semantic reasons. So I think what Mehdi is driving at is that if '-O0 -flto' has a mismatch from '-O0' in terms of what users expect, we should probably try to fix that. I'd suggest that there may be fundamental things we can't fix and that is OK. I don't think this is unprincipled either, we're doing the best we can to honor the user's request. The other thing that might help is to point out that there *are* principles behind why these flags. Unlike the differences between -O[123], all of -O0, -Os, and -Oz have non-threshold semantic implications. So with this change, I think we will have *all* the -O flags covered, because I view '-O[123]' as a single semantic space with a threshold modifier that we *don't* need to communicate to LTO. We model that state as the absence of any attribute. And -O0, -Os, and-Oz have dedicated attributes. If we ever want to really push on -Og, that might indeed require an attribute to distinguish it. >> optnone is not "the way to convey (-O0) to the optimizer." So, I view '-O0' as a request from the programmer to turn off the optimizer to the extent possible and give them as naive, minimally transformed representation of th ecode as possible. And based on that, I view optnone as a tool to implement precisely these semantics at a fine granularity and with survivability across bitcode roundtrip. It just isn't the *only* tool, and sometimes we can use an easier (and cheaper to Mehdi's compile time point) tool. I think the text for spec'ing optnone in the LLVM langref needs to be updated to reflect this though. Currently it says: > This function attribute indicates that most optimization passes will skip > this function, with the exception of interprocedural optimization passes. This is demonstrably false: % ag OptimizeNone lib/Transforms/IPO lib/Transforms/IPO/ForceFunctionAttrs.cpp 47: .Case("optnone", Attribute::OptimizeNone) lib/Transforms/IPO/Inliner.cpp 813:if (F.hasFnAttribute(Attribute::OptimizeNone)) lib/Transforms/IPO/InferFunctionAttrs.cpp 30:if (F.isDeclaration() && !F.hasFnAttribute((Attribute::OptimizeNone))) lib/Transforms/IPO/FunctionAttrs.cpp 1056:if (F.hasFnAttribute(Attribute::OptimizeNone)) { 1137:if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) { I'll send a patch. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#641632, @probinson wrote: > In https://reviews.llvm.org/D28404#641606, @mehdi_amini wrote: > > > If we want to support `-O0 -flto` and `optnone` it the way to convey this > > to the optimizer, I don't see the alternative. > > > optsize != -Os (according to Chandler) > minsize != -Oz (according to Chandler) > optnone != -O0 (according to both me and Chandler) Of course, but that's just an implementation limitation, mostly for historical reason I believe, not by design. That does not have to be set in stone and I'm giving you the direction with respect to LTO in particular here: these attributes should be able to behave the same way as the corresponding '-O' command line. > optnone is not "the way to convey (-O0) to the optimizer." Please get that > misunderstanding out of your head. Clang handles -O0 by creating a short, > minimalist pipeline, and running everything through it. Clang handles -O2 by > creating a fuller optimization pipeline, and functions with 'optnone' skip > many of the passes in the pipeline. Don't get me wrong: I believe I have a very good understanding how the optimizer pipeline is setup and how the passes operates with respect to the attributes. And it is because I understand the deficiencies (and how it is an issue with LTO) that I'm aligning all of this toward a consistent/coherent expected result for the users. > These are architecturally different processes, you are not going to be able > to make 'optnone' behave exactly like -O0 without major redesign of how the > pipelines work. I'd disagree about your estimation of "major". It's not gonna be tomorrow, sure, but it does not have to be. The most difficult part will be the inter procedural ones, but there are not that many. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#641606, @mehdi_amini wrote: > If we want to support `-O0 -flto` and `optnone` it the way to convey this to > the optimizer, I don't see the alternative. optsize != -Os (according to Chandler) minsize != -Oz (according to Chandler) optnone != -O0 (according to both me and Chandler) optnone is not "the way to convey (-O0) to the optimizer." Please get that misunderstanding out of your head. Clang handles -O0 by creating a short, minimalist pipeline, and running everything through it. Clang handles -O2 by creating a fuller optimization pipeline, and functions with 'optnone' skip many of the passes in the pipeline. These are architecturally different processes, you are not going to be able to make 'optnone' behave exactly like -O0 without major redesign of how the pipelines work. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#641597, @probinson wrote: > In https://reviews.llvm.org/D28404#641557, @mehdi_amini wrote: > > > As I stand right now, there hasn't been any correction. > > I still consider the fact that `optnone` wouldn't produce the "same" > > result (modulo corner cases around `merging global variables` for instance) > > as O0 a bug that need to be fixed. > > > Why? Why not? What's the alternative? If we want to support `-O0 -flto` and `optnone` it the way to convey this to the optimizer, I don't see the alternative. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#641557, @mehdi_amini wrote: > As I stand right now, there hasn't been any correction. > I still consider the fact that `optnone` wouldn't produce the "same" result > (modulo corner cases around `merging global variables` for instance) as O0 a > bug that need to be fixed. Why? That's not the purpose of optnone. You've already admitted there are some differences. Why are other differences important? https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#641538, @probinson wrote: > > - optnone isn't *really* no optimizations: clearly this is true, but then > > neither is -O0. We run the always inliner, a couple of other passes, and we > > run several parts of the code generators optimizer. I understand why > > optnone deficiencies (ie, too many optimizations) might be frustrating, but > > having *more users* seems likely to make this *better*. > > We have picked all the low-hanging fruit there, and probably some > medium-hanging fruit. Mehdi did have the misunderstanding that optnone == > -O0 and that I think was worth correcting. As I stand right now, there hasn't been any correction. I still consider the fact that `optnone` wouldn't produce the "same" result (modulo corner cases around `merging global variables` for instance) as O0 a bug that need to be fixed. (Disabling passes for compile time at O0 stays I compile time improvement, I never suggested to stop doing this...) https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#641078, @chandlerc wrote: > For me, the arguments you're raising against -O0 and -flto don't hold up on > closer inspection: > > - O0 != optnone: correct. But this is only visible in LTO. And in LTO, Os != > optsize, and Oz != minsize. But we use optsize and minsize to communicate > between the compilation and the LTO step to the best of our ability the > intent of the programmer. It appears we can use optnone exactly the same way > here. If the design decision is that relevant optimization controls are propagated into bitcode as function attributes, I grumble but concede it will do something similar to what was requested. It does bother me that we keep finding things that LTO needs to know but which it does not know because it runs in a separate phase of the workflow. I hope it is not a serious problem to ask "is there a more sensible way to fix this?" Maybe I'm not so good at expressing that so it comes out as a question rather than an objection, but that's basically what it is. This design decision leaves -O1/-Og needing yet another attribute, when we get around to that, but I suppose Og would not have the interaction-with-other-attributes problems that optnone has. > - optnone isn't *really* no optimizations: clearly this is true, but then > neither is -O0. We run the always inliner, a couple of other passes, and we > run several parts of the code generators optimizer. I understand why optnone > deficiencies (ie, too many optimizations) might be frustrating, but having > *more users* seems likely to make this *better*. We have picked all the low-hanging fruit there, and probably some medium-hanging fruit. Mehdi did have the misunderstanding that optnone == -O0 and that I think was worth correcting. > - There is no use case for -O0 + -flto: The email thread has an exchange between Duncan and me, where I accept the use case. > But all of this seems like an attempt to argue "you are wrong to have your > use case". I personally find that an unproductive line of discussion. Not saying it was *wrong* just the description did not convey adequate justification. Listing a few project types does not constitute a use case. We did get to one, eventually, and it even involved differences in optimization levels. > For example, you might ask: could we find some other way to solve the problem > you are trying to solve here? There is another way to make use of the attribute, which I think will be more robust: Have Sema pretend the pragma is in effect at all times, at -O0. Then all the existing conflict detection/resolution logic Just Works, and there's no need to spend 4 lines of code hoping to replicate the correct conditions in CodeGenModule. Because Sema does not have a handle on CodeGenOptions and therefore does not a-priori know the optimization level, probably the right thing to do is move the flag to LangOpts and set it under the correct conditions in CompilerInvocation. It wouldn't be the first codegen-like option in LangOpts. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
chandlerc added a comment. In https://reviews.llvm.org/D28404#640862, @probinson wrote: > In https://reviews.llvm.org/D28404#640682, @mehdi_amini wrote: > > > > I'm now thinking along the lines of a `-foptimize-off` flag (bikesheds > > > welcome) which would set the default for the pragma to 'off'. How is > > > that different than what you wanted for `-O0`? It is defined in terms of > > > an existing pragma, which is WAY easier to explain and WAY easier to > > > implement. And, it still lets us say that `-c -O0 -flto` is a mistake, > > > if that seems like a useful thing to say. > > > > Well -O0 being actually "disable optimization", I found "way easier" to > > handle everything the same way (pragma, command line, etc.). I kind of find > > it confusing for the user to differentiate `-O0` from `-foptimize=off`. > > What is supposed to change between the two? > > > There is a pedantic difference, rooted in the still-true factoid that O0 != > optnone. > If we redefine LTO as "Link Time Operation" (rather than Optimization; see > my reply to Duncan) then `-O0 -flto` is no longer an oxymoron, but using the > attribute to imply the optimization level is still not good fidelity to what > the user asked for. I have to say, I don't understand the confusion or problem here... For me, the arguments you're raising against -O0 and -flto don't hold up on closer inspection: - O0 != optnone: correct. But this is only visible in LTO. And in LTO, Os != optsize, and Oz != minsize. But we use optsize and minsize to communicate between the compilation and the LTO step to the best of our ability the intent of the programmer. It appears we can use optnone exactly the same way here. - optnone isn't *really* no optimizations: clearly this is true, but then neither is -O0. We run the always inliner, a couple of other passes, and we run several parts of the code generators optimizer. I understand why optnone deficiencies (ie, too many optimizations) might be frustrating, but having *more users* seems likely to make this *better*. - There is no use case for -O0 + -flto: I really don't understand this. CFI and other whole program analysis or semantic transformations (*not* optimizations) require LTO but not any particular pipeline. And I *really* want the ability to bisect files going into an LTO build to chase miscompiles. There are large systems built to manipulate flags that are much more efficient and accessible than modifying source code. It seems an entirely reasonable (and quite low cost) feature. The fact that the LTO acronym stands for Link Time Optimization seems like a relatively unimportant thing. It is just an acronym and a name. We shouldn't let it preclude interesting use cases. But all of this seems like an attempt to argue "you are wrong to have your use case". I personally find that an unproductive line of discussion. I would suggest instead we look at this differently: For example, you might ask: could we find some other way to solve the problem you are trying to solve here? Suggesting an alternative approach would seem constructive. So far, all we've got is modify source code, but I think that there is a clear explanation of why that doesn't address the particular use case. You might also ask: is supporting this feature a reasonable maintenance burden for Clang to address the use case? That seems like a productive discussion. For example, I *am* concerned about the increasing attribute noise at -O0. I don't think it is something to be dismissed. However, given the options we have today, it seems like the most effective way to address this use case and I don't have any better ideas to solve the problems Mehdi is solving here. But I'm also not one of the most active maintainers writing patches, fixing bugs, and improving the IRGen layer. So ultimately, I defer on the maintenance issue to those maintainers. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
RE: [PATCH] D28404: IRGen: Add optnone attribute on function during O0
(Re-add cfe-commits; otherwise same) > -Original Message- > From: cfe-commits [mailto:cfe-commits-boun...@lists.llvm.org] On Behalf Of > Duncan P. N. Exon Smith via cfe-commits > Sent: Monday, January 09, 2017 4:10 PM > To: reviews+d28404+public+53e0f4655ef79...@reviews.llvm.org > Cc: nhaeh...@gmail.com; wei.di...@amd.com; jholewin...@nvidia.com; Richard > Smith; cfe-commits; Peter Collingbourne > Subject: Re: [PATCH] D28404: IRGen: Add optnone attribute on function > during O0 > > This seems like a massive rehash of a discussion Peter Collingbourne and I > had about passing -O0 to the linker for -flto=full. I had previously > thought of LTO as "link time optimization", but in practice it's useful > for (and required for correctness of some) non-optimization IR passes. > > In other words, the basic question seems to be: "Should LTO support non- > optimization use cases?" I tend (now) to think it should -- having > "optimization" in its name is an historical artifact -- because adding > another way to run IR passes at link-time seems redundant. Whereas, Paul, > it seems like you disagree? I am persuaded that there are non-optimization-based uses for clumps of bitcode modules linked together. (We could redefine the TLA if we like; LTO = Link Time Operation?) I am equally convinced that we have no good story for propagating a variety of optimization- and codegen-related options to the top-level LTO processor. This is most especially true when different CUs might reasonably have different options. -O0 is the example at hand, but this problem seems to keep coming up and we keep hacking in ways to get the thing we think we need in the moment. > > (Also, this discussion seems higher level than just the patch at hand... > maybe llvm-dev would be more appropriate?) I'd be fine with that. --paulr > > > On 2017-Jan-09, at 16:03, Paul Robinson via Phabricator > <revi...@reviews.llvm.org> wrote: > > > > probinson added a comment. > > > > In https://reviews.llvm.org/D28404#640588, @mehdi_amini wrote: > > > >> Actually, as mentioned before, I could be fine with making `O0` > incompatible with LTO, however security features like CFI (or other sort > of whole-program analyses/instrumentations) requires LTO. > > > > > > Well, "requires LTO" is overstating the case, AFAICT from the link you > gave me. Doesn't depend on //optimization// at all. It depends on some > interprocedural analyses given some particular scope/visibility boundary, > which it is convenient to define as a set of linked bitcode modules, that > by some happy chance is the same set of linked bitcode modules that LTO > will operate on. > > > > If it's important to support combining a bitcode version of my- > application with your-bitcode-library for this CFI or whatever, and you > also want to let me have my-application be unoptimized while your-bitcode- > library gets optimized, NOW we have a use-case. (Maybe that's what you > had in mind earlier, but for some reason I wasn't able to extract that out > of any prior comments. No matter.) > > > > I'm now thinking along the lines of a `-foptimize-off` flag (bikesheds > welcome) which would set the default for the pragma to 'off'. How is that > different than what you wanted for `-O0`? It is defined in terms of an > existing pragma, which is WAY easier to explain and WAY easier to > implement. And, it still lets us say that `-c -O0 -flto` is a mistake, if > that seems like a useful thing to say. > > > > Does that seem reasonable? Fit your understanding of the needs? > > > > > > https://reviews.llvm.org/D28404 > > > > > > > > ___ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640682, @mehdi_amini wrote: > > I'm now thinking along the lines of a `-foptimize-off` flag (bikesheds > > welcome) which would set the default for the pragma to 'off'. How is that > > different than what you wanted for `-O0`? It is defined in terms of an > > existing pragma, which is WAY easier to explain and WAY easier to > > implement. And, it still lets us say that `-c -O0 -flto` is a mistake, if > > that seems like a useful thing to say. > > Well -O0 being actually "disable optimization", I found "way easier" to > handle everything the same way (pragma, command line, etc.). I kind of find > it confusing for the user to differentiate `-O0` from `-foptimize=off`. What > is supposed to change between the two? There is a pedantic difference, rooted in the still-true factoid that O0 != optnone. If we redefine LTO as "Link Time Operation" (rather than Optimization; see my reply to Duncan) then `-O0 -flto` is no longer an oxymoron, but using the attribute to imply the optimization level is still not good fidelity to what the user asked for. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D28404: IRGen: Add optnone attribute on function during O0
This seems like a massive rehash of a discussion Peter Collingbourne and I had about passing -O0 to the linker for -flto=full. I had previously thought of LTO as "link time optimization", but in practice it's useful for (and required for correctness of some) non-optimization IR passes. In other words, the basic question seems to be: "Should LTO support non-optimization use cases?" I tend (now) to think it should -- having "optimization" in its name is an historical artifact -- because adding another way to run IR passes at link-time seems redundant. Whereas, Paul, it seems like you disagree? (Also, this discussion seems higher level than just the patch at hand... maybe llvm-dev would be more appropriate?) > On 2017-Jan-09, at 16:03, Paul Robinson via Phabricator >wrote: > > probinson added a comment. > > In https://reviews.llvm.org/D28404#640588, @mehdi_amini wrote: > >> Actually, as mentioned before, I could be fine with making `O0` incompatible >> with LTO, however security features like CFI (or other sort of whole-program >> analyses/instrumentations) requires LTO. > > > Well, "requires LTO" is overstating the case, AFAICT from the link you gave > me. Doesn't depend on //optimization// at all. It depends on some > interprocedural analyses given some particular scope/visibility boundary, > which it is convenient to define as a set of linked bitcode modules, that by > some happy chance is the same set of linked bitcode modules that LTO will > operate on. > > If it's important to support combining a bitcode version of my-application > with your-bitcode-library for this CFI or whatever, and you also want to let > me have my-application be unoptimized while your-bitcode-library gets > optimized, NOW we have a use-case. (Maybe that's what you had in mind > earlier, but for some reason I wasn't able to extract that out of any prior > comments. No matter.) > > I'm now thinking along the lines of a `-foptimize-off` flag (bikesheds > welcome) which would set the default for the pragma to 'off'. How is that > different than what you wanted for `-O0`? It is defined in terms of an > existing pragma, which is WAY easier to explain and WAY easier to implement. > And, it still lets us say that `-c -O0 -flto` is a mistake, if that seems > like a useful thing to say. > > Does that seem reasonable? Fit your understanding of the needs? > > > https://reviews.llvm.org/D28404 > > > ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. > I'm now thinking along the lines of a `-foptimize-off` flag (bikesheds > welcome) which would set the default for the pragma to 'off'. How is that > different than what you wanted for `-O0`? It is defined in terms of an > existing pragma, which is WAY easier to explain and WAY easier to implement. > And, it still lets us say that `-c -O0 -flto` is a mistake, if that seems > like a useful thing to say. Well -O0 being actually "disable optimization", I found "way easier" to handle everything the same way (pragma, command line, etc.). I kind of find it confusing for the user to differentiate `-O0` from `-foptimize=off`. What is supposed to change between the two? https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640588, @mehdi_amini wrote: > Actually, as mentioned before, I could be fine with making `O0` incompatible > with LTO, however security features like CFI (or other sort of whole-program > analyses/instrumentations) requires LTO. Well, "requires LTO" is overstating the case, AFAICT from the link you gave me. Doesn't depend on //optimization// at all. It depends on some interprocedural analyses given some particular scope/visibility boundary, which it is convenient to define as a set of linked bitcode modules, that by some happy chance is the same set of linked bitcode modules that LTO will operate on. If it's important to support combining a bitcode version of my-application with your-bitcode-library for this CFI or whatever, and you also want to let me have my-application be unoptimized while your-bitcode-library gets optimized, NOW we have a use-case. (Maybe that's what you had in mind earlier, but for some reason I wasn't able to extract that out of any prior comments. No matter.) I'm now thinking along the lines of a `-foptimize-off` flag (bikesheds welcome) which would set the default for the pragma to 'off'. How is that different than what you wanted for `-O0`? It is defined in terms of an existing pragma, which is WAY easier to explain and WAY easier to implement. And, it still lets us say that `-c -O0 -flto` is a mistake, if that seems like a useful thing to say. Does that seem reasonable? Fit your understanding of the needs? https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. Actually, as mentioned before, I could be fine with making `O0` incompatible with LTO, however security features like CFI (or other sort of whole-program analyses/instrumentations) requires LTO. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640362, @probinson wrote: > In https://reviews.llvm.org/D28404#640314, @mehdi_amini wrote: > > > I don't follow: IMO if I generate a module with optnone and pipe it to `opt > > -O3` I expect no function IR to be touched. If it is not the case it is a > > bug. > > > Your opinion and expectation are not supported by the IR spec. Optnone skips > "most" optimization passes. It is not practical (or was not, at the time) to > make the -O3 pipeline behave exactly the same as the -O0 pipeline, and also > not actually necessary to support the purpose for which 'optnone' was > invented. > > If you have a goal of making 'optnone' functions use the actual -O0 pipeline, > while non-optnone functions use the optimizing pipeline, more power to you > and you will need to take up that particular design challenge with Chandler > first. Oh, maybe you are thinking of eliminating the -O0 pipeline? Because if -O0 implies optnone then it's kinda-sorta the same thing as the optimization pipeline operating on nothing but optnone functions? I'd think that would make -O0 compilations slow down, which would not be a feature. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. Basically, I don't see why having clang always emit a real .o at -O0 would be a problem. I haven't gotten through the other-CFI documentation yet though. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640314, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#640284, @probinson wrote: > > > In https://reviews.llvm.org/D28404#640178, @mehdi_amini wrote: > > > > > In https://reviews.llvm.org/D28404#640170, @probinson wrote: > > > > > > > In my experience, modifying source is by far simpler than hacking a > > > > build system to make a special case for compiler options for one module > > > > in an application. (If you have a way to build Clang with everything > > > > done LTO except one module built with -O0, on Linux with ninja, I would > > > > be very curious to hear how you do that.) > > > > > > > > > Static library, separated projects, etc. > > > We have tons of users... > > > > > > Still waiting. > > > Waiting for what? > We have use-cases, I gave you a few (vendor static libraries are one). > Again, if you think it is wrong to support O0 and LTO, then please elaborate. Your original use-case described debugging a module in an application. You claimed it was simpler to change the build options for a module than change the source, which I am still waiting to hear how/why that is simpler. Your subsequent use cases are about entire sub-projects, which is entirely different and orthogonal to where you started. Please elaborate on the original use case. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640314, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#640284, @probinson wrote: > > > Upfront, it seemed peculiar to handle only one optimization level. After > > more thought, the whole idea of mixing -O0 and LTO seems wrong. Sorry, > > should have signaled that I had changed my mind about it. > > > You just haven't articulated 1) why it is wrong and 2) what should we do > about it. "Optimize without optimizing" really? Does not sound confused to you? Persuade me why it makes sense. If it doesn't make sense, then yes making the `-O0 -flto` combination an error would be the right path. Unless you are taking the position that `-flto` doesn't mean "use LTO" and instead means something else, like "emit bitcode" in which case you should be advocating to change the name of the option to say what it means. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640314, @mehdi_amini wrote: > You just wrote above that " mixing -O0 and LTO " is wrong, *if* I were to > agree with you at some point, then I'd make it a hard error. Yes, I was not clear that I meant that `-O0 -flto` on the same clang command line just seems nonsensical. "Optimize my program without optimizing it" forsooth. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640314, @mehdi_amini wrote: > I don't follow: IMO if I generate a module with optnone and pipe it to `opt > -O3` I expect no function IR to be touched. If it is not the case it is a bug. Your opinion and expectation are not supported by the IR spec. Optnone skips "most" optimization passes. It is not practical (or was not, at the time) to make the -O3 pipeline behave exactly the same as the -O0 pipeline, and also not actually necessary to support the purpose for which 'optnone' was invented. If you have a goal of making 'optnone' functions use the actual -O0 pipeline, while non-optnone functions use the optimizing pipeline, more power to you and you will need to take up that particular design challenge with Chandler first. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#640297, @probinson wrote: > Sorry, you lost me. CFI is part of DWARF and we do DWARF perfectly well > without LTO (and at O0). This CFI: http://clang.llvm.org/docs/ControlFlowIntegrity.html https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#640284, @probinson wrote: > In https://reviews.llvm.org/D28404#640178, @mehdi_amini wrote: > > > In https://reviews.llvm.org/D28404#640170, @probinson wrote: > > > > > In https://reviews.llvm.org/D28404#640090, @mehdi_amini wrote: > > > > > > > In https://reviews.llvm.org/D28404#640046, @probinson wrote: > > > > > > > > > "I don't care" doesn't seem like much of a principle. > > > > > > > > > > > > Long version is: "There is no use-case, no users, so I don't have much > > > > motivation to push it forward for the only sake of completeness". Does > > > > it sound enough of a principle like that? > > > > > > > > > No. You still need to have adequate justification for your use case, > > > which I think you do not. > > > > > > I don't follow your logic. > > IIUC, you asked about "why not supporting `O1/O2/O3`" ; how is *not > > supporting* these because their not useful / don't have use-case related to > > "supporting `O0` is useful"? > > > Upfront, it seemed peculiar to handle only one optimization level. After > more thought, the whole idea of mixing -O0 and LTO seems wrong. Sorry, > should have signaled that I had changed my mind about it. You just haven't articulated 1) why it is wrong and 2) what should we do about it. > > > Optnone does not equal -O0. It is a debugging aid for the programmer, > because debugging optimized code sucks. If you have an LTO-built > application and want to de-optimize parts of it to aid with debugging, > then you can use the pragma, as originally intended. Having to modifying the source isn't friendly. Not being able to honor -O0 during LTO is not user-friendly. >>> >>> IMO, '-O0' and '-flto' are conflicting options and therefore not deserving >>> of special support. >> >> You're advocating for *rejecting* O0 built module at link-time? We'd still >> need to detect this though. Status-quo isn't acceptable. >> Also, that's not practicable: what if I have an LTO static library for >> which I don't have the source, now if I build my own file with -O0 -flto I >> can't link anymore. > > No, I'm saying they are conflicting options on the same Clang command line. > As long as your linker can handle foo.o and bar.bc on the same command line, > not a problem. (If your linker can't handle that, fix the linker first.) You just wrote above that " mixing -O0 and LTO " is wrong, *if* I were to agree with you at some point, then I'd make it a hard error. >>> In my experience, modifying source is by far simpler than hacking a build >>> system to make a special case for compiler options for one module in an >>> application. (If you have a way to build Clang with everything done LTO >>> except one module built with -O0, on Linux with ninja, I would be very >>> curious to hear how you do that.) >> >> Static library, separated projects, etc. >> We have tons of users... > > Still waiting. Waiting for what? We have use-cases, I gave you a few (vendor static libraries are one). Again, if you think it is wrong to support O0 and LTO, then please elaborate. > I don't think `-c -O0` should get this not-entirely-O0-like behavior. What is "not-entirely"? And why do you think that? >>> >>> "Not entirely" means that running the -O0 pipeline, and running an >>> optimization pipeline but asking some subset of passes to turn themselves >>> off, does not get you the same result. And I think that because I'm the >>> one who put 'optnone' upstream in the first place. The case that >>> particularly sticks in my memory is the register allocator, but I believe >>> there are passes at every stage that do not turn themselves off for optnone. >> >> That's orthogonal: you're saying we are not handling it correctly yet, I'm >> just moving toward *fixing* all these. > > It's not orthogonal; that's exactly how 'optnone' behaves today. If you have > proposed a redesign of how to mix optnone and non-optnone functions in the > same compilation unit, in some way other than what's done today, I am not > aware of it; can you point to your proposal? I don't follow: IMO if I generate a module with optnone and pipe it to `opt -O3` I expect no function IR to be touched. If it is not the case it is a bug. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640182, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#640178, @mehdi_amini wrote: > > > Also, that's not practicable: what if I have an LTO static library for > > which I don't have the source, now if I build my own file with -O0 -flto I > > can't link anymore. > > > Also: LTO is required for some features likes CFI. There are users who wants > CFI+O0 during development (possibly for debugging a subcomponent of the app). Sorry, you lost me. CFI is part of DWARF and we do DWARF perfectly well without LTO (and at O0). https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640178, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#640170, @probinson wrote: > > > In https://reviews.llvm.org/D28404#640090, @mehdi_amini wrote: > > > > > In https://reviews.llvm.org/D28404#640046, @probinson wrote: > > > > > > > "I don't care" doesn't seem like much of a principle. > > > > > > > > > Long version is: "There is no use-case, no users, so I don't have much > > > motivation to push it forward for the only sake of completeness". Does it > > > sound enough of a principle like that? > > > > > > No. You still need to have adequate justification for your use case, which > > I think you do not. > > > I don't follow your logic. > IIUC, you asked about "why not supporting `O1/O2/O3`" ; how is *not > supporting* these because their not useful / don't have use-case related to > "supporting `O0` is useful"? Upfront, it seemed peculiar to handle only one optimization level. After more thought, the whole idea of mixing -O0 and LTO seems wrong. Sorry, should have signaled that I had changed my mind about it. Optnone does not equal -O0. It is a debugging aid for the programmer, because debugging optimized code sucks. If you have an LTO-built application and want to de-optimize parts of it to aid with debugging, then you can use the pragma, as originally intended. >>> >>> Having to modifying the source isn't friendly. Not being able to honor -O0 >>> during LTO is not user-friendly. >> >> IMO, '-O0' and '-flto' are conflicting options and therefore not deserving >> of special support. > > You're advocating for *rejecting* O0 built module at link-time? We'd still > need to detect this though. Status-quo isn't acceptable. > Also, that's not practicable: what if I have an LTO static library for which > I don't have the source, now if I build my own file with -O0 -flto I can't > link anymore. No, I'm saying they are conflicting options on the same Clang command line. As long as your linker can handle foo.o and bar.bc on the same command line, not a problem. (If your linker can't handle that, fix the linker first.) >> In my experience, modifying source is by far simpler than hacking a build >> system to make a special case for compiler options for one module in an >> application. (If you have a way to build Clang with everything done LTO >> except one module built with -O0, on Linux with ninja, I would be very >> curious to hear how you do that.) > > Static library, separated projects, etc. > We have tons of users... Still waiting. Your up-front use case was about de-optimizing a module to assist debugging it within an LTO-built application, not building entire projects one way versus another. If that is not actually your use case, you need to start over with the correct description. I don't think `-c -O0` should get this not-entirely-O0-like behavior. >>> >>> What is "not-entirely"? And why do you think that? >> >> "Not entirely" means that running the -O0 pipeline, and running an >> optimization pipeline but asking some subset of passes to turn themselves >> off, does not get you the same result. And I think that because I'm the one >> who put 'optnone' upstream in the first place. The case that particularly >> sticks in my memory is the register allocator, but I believe there are >> passes at every stage that do not turn themselves off for optnone. > > That's orthogonal: you're saying we are not handling it correctly yet, I'm > just moving toward *fixing* all these. It's not orthogonal; that's exactly how 'optnone' behaves today. If you have proposed a redesign of how to mix optnone and non-optnone functions in the same compilation unit, in some way other than what's done today, I am not aware of it; can you point to your proposal? https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#640178, @mehdi_amini wrote: > Also, that's not practicable: what if I have an LTO static library for which > I don't have the source, now if I build my own file with -O0 -flto I can't > link anymore. Also: LTO is required for some features likes CFI. There are users who wants CFI+O0 during development (possibly for debugging a subcomponent of the app). https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#640170, @probinson wrote: > In https://reviews.llvm.org/D28404#640090, @mehdi_amini wrote: > > > In https://reviews.llvm.org/D28404#640046, @probinson wrote: > > > > > "I don't care" doesn't seem like much of a principle. > > > > > > Long version is: "There is no use-case, no users, so I don't have much > > motivation to push it forward for the only sake of completeness". Does it > > sound enough of a principle like that? > > > No. You still need to have adequate justification for your use case, which I > think you do not. I don't follow your logic. IIUC, you asked about "why not supporting `O1/O2/O3`" ; how is *not supporting* these because their not useful / don't have use-case related to "supporting `O0` is useful"? > > >>> Optnone does not equal -O0. It is a debugging aid for the programmer, >>> because debugging optimized code sucks. If you have an LTO-built >>> application and want to de-optimize parts of it to aid with debugging, then >>> you can use the pragma, as originally intended. >> >> Having to modifying the source isn't friendly. Not being able to honor -O0 >> during LTO is not user-friendly. > > IMO, '-O0' and '-flto' are conflicting options and therefore not deserving of > special support. You're advocating for *rejecting* O0 built module at link-time? We'd still need to detect this though. Status-quo isn't acceptable. Also, that's not practicable: what if I have an LTO static library for which I don't have the source, now if I build my own file with -O0 -flto I can't link anymore. > In my experience, modifying source is by far simpler than hacking a build > system to make a special case for compiler options for one module in an > application. (If you have a way to build Clang with everything done LTO > except one module built with -O0, on Linux with ninja, I would be very > curious to hear how you do that.) Static library, separated projects, etc. We have tons of users... >>> I don't think `-c -O0` should get this not-entirely-O0-like behavior. >> >> What is "not-entirely"? And why do you think that? > > "Not entirely" means that running the -O0 pipeline, and running an > optimization pipeline but asking some subset of passes to turn themselves > off, does not get you the same result. And I think that because I'm the one > who put 'optnone' upstream in the first place. The case that particularly > sticks in my memory is the register allocator, but I believe there are passes > at every stage that do not turn themselves off for optnone. That's orthogonal: you're saying we are not handling it correctly yet, I'm just moving toward *fixing* all these. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640170, @probinson wrote: > In my experience, modifying source Note that the source modification consists of adding `#pragma clang optimize off` to the top of the file. It is not a complicated thing. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#640090, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#640046, @probinson wrote: > > > "I don't care" doesn't seem like much of a principle. > > > Long version is: "There is no use-case, no users, so I don't have much > motivation to push it forward for the only sake of completeness". Does it > sound enough of a principle like that? No. You still need to have adequate justification for your use case, which I think you do not. >> Optnone does not equal -O0. It is a debugging aid for the programmer, >> because debugging optimized code sucks. If you have an LTO-built >> application and want to de-optimize parts of it to aid with debugging, then >> you can use the pragma, as originally intended. > > Having to modifying the source isn't friendly. Not being able to honor -O0 > during LTO is not user-friendly. IMO, '-O0' and '-flto' are conflicting options and therefore not deserving of special support. In my experience, modifying source is by far simpler than hacking a build system to make a special case for compiler options for one module in an application. (If you have a way to build Clang with everything done LTO except one module built with -O0, on Linux with ninja, I would be very curious to hear how you do that.) But if your build system makes that easy, you can just as easily remove `-flto` as add `-O0` and thus get the result you want without trying to pass conflicting options to the compiler. Or spending time implementing this patch. >> I don't think `-c -O0` should get this not-entirely-O0-like behavior. > > What is "not-entirely"? And why do you think that? "Not entirely" means that running the -O0 pipeline, and running an optimization pipeline but asking some subset of passes to turn themselves off, does not get you the same result. And I think that because I'm the one who put 'optnone' upstream in the first place. The case that particularly sticks in my memory is the register allocator, but I believe there are passes at every stage that do not turn themselves off for optnone. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#640046, @probinson wrote: > In https://reviews.llvm.org/D28404#639887, @mehdi_amini wrote: > > > In https://reviews.llvm.org/D28404#639874, @probinson wrote: > > > > > Over the weekend I had a thought: Why is -O0 so special here? That is, > > > after going to all this trouble to propagate -O0 to LTO, how does this > > > generalize to propagating -O1 or any other specific -O option? (Maybe > > > this question would be better dealt with on the dev list...) > > > > > > O0 is "special" like Os and Oz because we have an attribute for it and > > passes "know" how to handle this attribute. > > I guess no-one cares enough about > > https://reviews.llvm.org/owners/package/1//https://reviews.llvm.org/owners/package/2//O3 > > to find a solution for these (in the context of LTO, I don't really care > > about > > https://reviews.llvm.org/owners/package/1//https://reviews.llvm.org/owners/package/2/). > > It is likely that Og would need a special treatment at some point, maybe > > with a new attribute as well, to inhibit optimization that can't preserve > > debug info properly. > > > "I don't care" doesn't seem like much of a principle. Long version is: "There is no use-case, no users, so I don't have much motivation to push it forward for the only sake of completeness". Does it sound enough of a principle like that? > Optnone does not equal -O0. It is a debugging aid for the programmer, > because debugging optimized code sucks. If you have an LTO-built application > and want to de-optimize parts of it to aid with debugging, then you can use > the pragma, as originally intended. Having to modifying the source isn't friendly. Not being able to honor -O0 during LTO is not user-friendly. > I don't think `-c -O0` should get this not-entirely-O0-like behavior. What is "not-entirely"? And why do you think that? https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#639887, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#639874, @probinson wrote: > > > Over the weekend I had a thought: Why is -O0 so special here? That is, > > after going to all this trouble to propagate -O0 to LTO, how does this > > generalize to propagating -O1 or any other specific -O option? (Maybe this > > question would be better dealt with on the dev list...) > > > O0 is "special" like Os and Oz because we have an attribute for it and passes > "know" how to handle this attribute. > I guess no-one cares enough about > https://reviews.llvm.org/owners/package/1//https://reviews.llvm.org/owners/package/2//O3 > to find a solution for these (in the context of LTO, I don't really care > about > https://reviews.llvm.org/owners/package/1//https://reviews.llvm.org/owners/package/2/). > It is likely that Og would need a special treatment at some point, maybe > with a new attribute as well, to inhibit optimization that can't preserve > debug info properly. "I don't care" doesn't seem like much of a principle. Optnone does not equal -O0. It is a debugging aid for the programmer, because debugging optimized code sucks. If you have an LTO-built application and want to de-optimize parts of it to aid with debugging, then you can use the pragma, as originally intended. I don't think `-c -O0 -flto` should get this not-entirely-O0-like behavior. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#639874, @probinson wrote: > Over the weekend I had a thought: Why is -O0 so special here? That is, > after going to all this trouble to propagate -O0 to LTO, how does this > generalize to propagating -O1 or any other specific -O option? (Maybe this > question would be better dealt with on the dev list...) O0 is "special" like Os and Oz because we have an attribute for it and passes "know" how to handle this attribute. I guess no-one cares enough about https://reviews.llvm.org/owners/package/1//https://reviews.llvm.org/owners/package/2//O3 to find a solution for these (in the context of LTO, I don't really care about https://reviews.llvm.org/owners/package/1//https://reviews.llvm.org/owners/package/2/). It is likely that Og would need a special treatment at some point, maybe with a new attribute as well, to inhibit optimization that can't preserve debug info properly. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. Over the weekend I had a thought: Why is -O0 so special here? That is, after going to all this trouble to propagate -O0 to LTO, how does this generalize to propagating -O1 or any other specific -O option? (Maybe this question would be better dealt with on the dev list...) https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added inline comments. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:910-912 // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); chandlerc wrote: > mehdi_amini wrote: > > probinson wrote: > > > mehdi_amini wrote: > > > > chandlerc wrote: > > > > > Is this still at all correct? Why? it seems pretty confusing > > > > > especially in conjunction with the code below. > > > > > > > > > > > > > > > I think this may force you to either: > > > > > a) stop early-marking of -Os and -Oz flags with these attributes > > > > > (early: prior to calling this routine) and handling all of the -O > > > > > flag synthesized attributes here, or > > > > > b) set optnone for -O0 wher ewe set optsize for -Os and friends, and > > > > > then remove it where necessary here. > > > > > > > > > > I don't have any strong opinion about a vs. b. > > > > I believe it is still correct: during Os/Oz we reach this point and > > > > figure that there is `__attribute__((optnone))` in the *source* (not > > > > `-O0`), we remove the attributes, nothing changes. Did I miss something? > > > > > > > Hmmm the Os/Oz attributes are added in CGCall.cpp, and are guarded with a > > > check on the presence of the Optnone source attribute, so if the Optnone > > > source attribute is present we should never see these. And Os/Oz set > > > OptimizationLevel to 2, which is not zero, so we won't come through here > > > for ShouldAddOptNone reasons either. > > > Therefore these 'remove' calls should be no-ops and could be removed. > > > (For paranoia you could turn them into asserts, and do some experimenting > > > to see whether I'm confused about how this all fits together.) > > The verifier is already complaining if we get this wrong, and indeed it > > complains if I'm removing these. > > See clang/test/CodeGen/attr-func-def.c: > > > > ``` > > > > int foo1(int); > > > > int foo2(int a) { > > return foo1(a + 2); > > } > > > > __attribute__((optnone)) > > int foo1(int a) { > > return a + 1; > > } > > ``` > > > > Here we have the attributed optnone on the definition but not the > > declaration, and the check you're mentioning in CGCalls is only applying to > > the declaration. > This is all still incredibly confusing code. > > I think what would make me happy with this is to have a separate section for > each mutually exclusive group of LLVM attributes added to the function. so: > > // Add the relevant optimization level to the LLVM function. > if (...) { > B.addAttribute(llvm::Attribute::OptNone); > F.removeFnAttr(llvm::ATtribute::OptForSize); > ... > } else if (...) { > B.addAttribute(llvm::Attribute::OptForSize); > } else if (...) } > ... > } > > // Add the inlining control attributes. > if (...) { > > } else if (...) { > > } else if (...) { > > } > > // Add specific semantic attributes such as 'naked' and 'cold'. > if (D->hasAttr()) { > B.addAttribute(...::Naked); > } > if (D->hasAttr()) { > ... > } > > Even though this means testing the Clang-level attributes multiple times, I > think it'll be much less confusing to read and update. We're actually already > really close. just need to hoist the non-inlining bits of optnone out, sink > the naked attribute down, and hoist the cold sizeopt up. > Since you answer below the example I gave above, I just want to be sure you understand that the attributes for the *declarations* are not even handled in the same file right? The "state machine" is cross TU here, and it seems to me what you're describing would require some refactoring between CGCall.cpp and CodeGenModule.cpp. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
chandlerc added inline comments. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:910-912 // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); mehdi_amini wrote: > probinson wrote: > > mehdi_amini wrote: > > > chandlerc wrote: > > > > Is this still at all correct? Why? it seems pretty confusing especially > > > > in conjunction with the code below. > > > > > > > > > > > > I think this may force you to either: > > > > a) stop early-marking of -Os and -Oz flags with these attributes > > > > (early: prior to calling this routine) and handling all of the -O flag > > > > synthesized attributes here, or > > > > b) set optnone for -O0 wher ewe set optsize for -Os and friends, and > > > > then remove it where necessary here. > > > > > > > > I don't have any strong opinion about a vs. b. > > > I believe it is still correct: during Os/Oz we reach this point and > > > figure that there is `__attribute__((optnone))` in the *source* (not > > > `-O0`), we remove the attributes, nothing changes. Did I miss something? > > > > > Hmmm the Os/Oz attributes are added in CGCall.cpp, and are guarded with a > > check on the presence of the Optnone source attribute, so if the Optnone > > source attribute is present we should never see these. And Os/Oz set > > OptimizationLevel to 2, which is not zero, so we won't come through here > > for ShouldAddOptNone reasons either. > > Therefore these 'remove' calls should be no-ops and could be removed. (For > > paranoia you could turn them into asserts, and do some experimenting to see > > whether I'm confused about how this all fits together.) > The verifier is already complaining if we get this wrong, and indeed it > complains if I'm removing these. > See clang/test/CodeGen/attr-func-def.c: > > ``` > > int foo1(int); > > int foo2(int a) { > return foo1(a + 2); > } > > __attribute__((optnone)) > int foo1(int a) { > return a + 1; > } > ``` > > Here we have the attributed optnone on the definition but not the > declaration, and the check you're mentioning in CGCalls is only applying to > the declaration. This is all still incredibly confusing code. I think what would make me happy with this is to have a separate section for each mutually exclusive group of LLVM attributes added to the function. so: // Add the relevant optimization level to the LLVM function. if (...) { B.addAttribute(llvm::Attribute::OptNone); F.removeFnAttr(llvm::ATtribute::OptForSize); ... } else if (...) { B.addAttribute(llvm::Attribute::OptForSize); } else if (...) } ... } // Add the inlining control attributes. if (...) { } else if (...) { } else if (...) { } // Add specific semantic attributes such as 'naked' and 'cold'. if (D->hasAttr()) { B.addAttribute(...::Naked); } if (D->hasAttr()) { ... } Even though this means testing the Clang-level attributes multiple times, I think it'll be much less confusing to read and update. We're actually already really close. just need to hoist the non-inlining bits of optnone out, sink the naked attribute down, and hoist the cold sizeopt up. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini updated this revision to Diff 83480. mehdi_amini added a comment. Forgot to update test/CodeGen/attr-naked.c https://reviews.llvm.org/D28404 Files: clang/include/clang/Driver/CC1Options.td clang/include/clang/Frontend/CodeGenOptions.def clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/aarch64-neon-2velem.c clang/test/CodeGen/aarch64-neon-3v.c clang/test/CodeGen/aarch64-neon-across.c clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-intrinsics.c clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/aarch64-neon-misc.c clang/test/CodeGen/aarch64-neon-perm.c clang/test/CodeGen/aarch64-neon-scalar-copy.c clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c clang/test/CodeGen/aarch64-neon-shifts.c clang/test/CodeGen/aarch64-neon-tbl.c clang/test/CodeGen/aarch64-neon-vcombine.c clang/test/CodeGen/aarch64-neon-vget-hilo.c clang/test/CodeGen/aarch64-neon-vget.c clang/test/CodeGen/aarch64-poly64.c clang/test/CodeGen/address-safety-attr-kasan.cpp clang/test/CodeGen/address-safety-attr.cpp clang/test/CodeGen/arm-crc32.c clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-neon-fma.c clang/test/CodeGen/arm-neon-numeric-maxmin.c clang/test/CodeGen/arm-neon-vcvtX.c clang/test/CodeGen/arm-neon-vget.c clang/test/CodeGen/arm64-lanes.c clang/test/CodeGen/arm64_vcopy.c clang/test/CodeGen/arm64_vdupq_n_f64.c clang/test/CodeGen/attr-coldhot.c clang/test/CodeGen/attr-naked.c clang/test/CodeGen/builtins-arm-exclusive.c clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c clang/test/CodeGen/noduplicate-cxx11-test.cpp clang/test/CodeGen/pragma-weak.c clang/test/CodeGen/unwind-attr.c clang/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp clang/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp clang/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp clang/test/CodeGenCXX/optnone-templates.cpp clang/test/CodeGenCXX/static-init-wasm.cpp clang/test/CodeGenCXX/thunks.cpp clang/test/CodeGenObjC/gnu-exceptions.m clang/test/CodeGenOpenCL/amdgpu-attrs.cl clang/test/Driver/darwin-iphone-defaults.m Index: clang/test/Driver/darwin-iphone-defaults.m === --- clang/test/Driver/darwin-iphone-defaults.m +++ clang/test/Driver/darwin-iphone-defaults.m @@ -26,4 +26,4 @@ [I1 alloc]; } -// CHECK: attributes [[F0]] = { noinline ssp{{.*}} } +// CHECK: attributes [[F0]] = { noinline optnone ssp{{.*}} } Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl === --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -141,26 +141,26 @@ // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added inline comments. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:910-912 // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); probinson wrote: > mehdi_amini wrote: > > chandlerc wrote: > > > Is this still at all correct? Why? it seems pretty confusing especially > > > in conjunction with the code below. > > > > > > > > > I think this may force you to either: > > > a) stop early-marking of -Os and -Oz flags with these attributes (early: > > > prior to calling this routine) and handling all of the -O flag > > > synthesized attributes here, or > > > b) set optnone for -O0 wher ewe set optsize for -Os and friends, and then > > > remove it where necessary here. > > > > > > I don't have any strong opinion about a vs. b. > > I believe it is still correct: during Os/Oz we reach this point and figure > > that there is `__attribute__((optnone))` in the *source* (not `-O0`), we > > remove the attributes, nothing changes. Did I miss something? > > > Hmmm the Os/Oz attributes are added in CGCall.cpp, and are guarded with a > check on the presence of the Optnone source attribute, so if the Optnone > source attribute is present we should never see these. And Os/Oz set > OptimizationLevel to 2, which is not zero, so we won't come through here for > ShouldAddOptNone reasons either. > Therefore these 'remove' calls should be no-ops and could be removed. (For > paranoia you could turn them into asserts, and do some experimenting to see > whether I'm confused about how this all fits together.) The verifier is already complaining if we get this wrong, and indeed it complains if I'm removing these. See clang/test/CodeGen/attr-func-def.c: ``` int foo1(int); int foo2(int a) { return foo1(a + 2); } __attribute__((optnone)) int foo1(int a) { return a + 1; } ``` Here we have the attributed optnone on the definition but not the declaration, and the check you're mentioning in CGCalls is only applying to the declaration. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added inline comments. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:910-912 // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); mehdi_amini wrote: > chandlerc wrote: > > Is this still at all correct? Why? it seems pretty confusing especially in > > conjunction with the code below. > > > > > > I think this may force you to either: > > a) stop early-marking of -Os and -Oz flags with these attributes (early: > > prior to calling this routine) and handling all of the -O flag synthesized > > attributes here, or > > b) set optnone for -O0 wher ewe set optsize for -Os and friends, and then > > remove it where necessary here. > > > > I don't have any strong opinion about a vs. b. > I believe it is still correct: during Os/Oz we reach this point and figure > that there is `__attribute__((optnone))` in the *source* (not `-O0`), we > remove the attributes, nothing changes. Did I miss something? > Hmmm the Os/Oz attributes are added in CGCall.cpp, and are guarded with a check on the presence of the Optnone source attribute, so if the Optnone source attribute is present we should never see these. And Os/Oz set OptimizationLevel to 2, which is not zero, so we won't come through here for ShouldAddOptNone reasons either. Therefore these 'remove' calls should be no-ops and could be removed. (For paranoia you could turn them into asserts, and do some experimenting to see whether I'm confused about how this all fits together.) https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini updated this revision to Diff 83468. mehdi_amini added a comment. Address Paul's comment (remove useless block and add period to end comment) https://reviews.llvm.org/D28404 Files: clang/include/clang/Driver/CC1Options.td clang/include/clang/Frontend/CodeGenOptions.def clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/aarch64-neon-2velem.c clang/test/CodeGen/aarch64-neon-3v.c clang/test/CodeGen/aarch64-neon-across.c clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-intrinsics.c clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/aarch64-neon-misc.c clang/test/CodeGen/aarch64-neon-perm.c clang/test/CodeGen/aarch64-neon-scalar-copy.c clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c clang/test/CodeGen/aarch64-neon-shifts.c clang/test/CodeGen/aarch64-neon-tbl.c clang/test/CodeGen/aarch64-neon-vcombine.c clang/test/CodeGen/aarch64-neon-vget-hilo.c clang/test/CodeGen/aarch64-neon-vget.c clang/test/CodeGen/aarch64-poly64.c clang/test/CodeGen/address-safety-attr-kasan.cpp clang/test/CodeGen/address-safety-attr.cpp clang/test/CodeGen/arm-crc32.c clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-neon-fma.c clang/test/CodeGen/arm-neon-numeric-maxmin.c clang/test/CodeGen/arm-neon-vcvtX.c clang/test/CodeGen/arm-neon-vget.c clang/test/CodeGen/arm64-lanes.c clang/test/CodeGen/arm64_vcopy.c clang/test/CodeGen/arm64_vdupq_n_f64.c clang/test/CodeGen/attr-coldhot.c clang/test/CodeGen/builtins-arm-exclusive.c clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c clang/test/CodeGen/noduplicate-cxx11-test.cpp clang/test/CodeGen/pragma-weak.c clang/test/CodeGen/unwind-attr.c clang/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp clang/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp clang/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp clang/test/CodeGenCXX/optnone-templates.cpp clang/test/CodeGenCXX/static-init-wasm.cpp clang/test/CodeGenCXX/thunks.cpp clang/test/CodeGenObjC/gnu-exceptions.m clang/test/CodeGenOpenCL/amdgpu-attrs.cl clang/test/Driver/darwin-iphone-defaults.m Index: clang/test/Driver/darwin-iphone-defaults.m === --- clang/test/Driver/darwin-iphone-defaults.m +++ clang/test/Driver/darwin-iphone-defaults.m @@ -26,4 +26,4 @@ [I1 alloc]; } -// CHECK: attributes [[F0]] = { noinline ssp{{.*}} } +// CHECK: attributes [[F0]] = { noinline optnone ssp{{.*}} } Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl === --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -141,26 +141,26 @@ // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini marked 2 inline comments as done. mehdi_amini added inline comments. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:900 + ShouldAddOptNone &= !D->hasAttr(); + if (ShouldAddOptNone) { +B.addAttribute(llvm::Attribute::OptimizeNone); probinson wrote: > This block is redundant now? The same things are added in the next if block. Oh right! Will remove, thanks! https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added inline comments. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:896 + !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; + // We can't add optnone in the following cases, it won't pass the verifier + ShouldAddOptNone &= !D->hasAttr(); Period at the end of a comment. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:900 + ShouldAddOptNone &= !D->hasAttr(); + if (ShouldAddOptNone) { +B.addAttribute(llvm::Attribute::OptimizeNone); This block is redundant now? The same things are added in the next if block. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini marked 6 inline comments as done. mehdi_amini added inline comments. Comment at: clang/lib/CodeGen/CGOpenMPRuntime.cpp:760-762 Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); chandlerc wrote: > At point where we are in numerous places doing 3 coupled calls, we should add > some routine to do this... Maybe we should have when I added the noinline bit. > > I don't have a good idea of where best to do this -- as part of or as an > alternative to `SetInternalFunctionAttributes`? Something else? > > I'm imagining something like `SetAlwaysInlinedRuntimeFunctionAttributes` or > something. Need a clang IRGen person to help push the organization in the > right direction. Yes some refactoring of all this custom handling would be welcome. I'll take any pointer to how to do it in clang (I'm not familiar enough with clang). Comment at: clang/lib/CodeGen/CodeGenModule.cpp:892 + // -O0 adds the optnone attribute, except if specific attributes prevents it. + bool ShouldAddOptNone = chandlerc wrote: > attributes prevents -> attributes prevent > > ACtually, what do you mean by attributes here? Or should this comment instead > go below, where we start to branch on the actual 'hasAttr' calls? > > After reading below, I understand better. Maybe: > > // Track whether we need to add the optnone LLVM attribute, > // starting with the default for this optimization level. Actually I instead moved it all together. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:910-912 // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); chandlerc wrote: > Is this still at all correct? Why? it seems pretty confusing especially in > conjunction with the code below. > > > I think this may force you to either: > a) stop early-marking of -Os and -Oz flags with these attributes (early: > prior to calling this routine) and handling all of the -O flag synthesized > attributes here, or > b) set optnone for -O0 wher ewe set optsize for -Os and friends, and then > remove it where necessary here. > > I don't have any strong opinion about a vs. b. I believe it is still correct: during Os/Oz we reach this point and figure that there is `__attribute__((optnone))` in the *source* (not `-O0`), we remove the attributes, nothing changes. Did I miss something? Comment at: clang/lib/CodeGen/CodeGenModule.cpp:962 + ShouldAddOptNone &= !D->hasAttr(); + ShouldAddOptNone &= !D->hasAttr(); + ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); chandlerc wrote: > why is optnone incompatible with *cold* The source attribute "Cold" adds `llvm::Attribute::OptimizeForSize` even at O0 right now, I changed this and now we emit `optnone` at O0 in this case. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini updated this revision to Diff 83459. mehdi_amini added a comment. Address comments: reorganize the way ShouldAddOptNone is handled, hopefully make it more easy to track. Also after talking with Chandler on IRC, the source attribute "cold" does not add the LLVM IR attribute "optsize" at O0, we add "optnone" instead. https://reviews.llvm.org/D28404 Files: clang/include/clang/Driver/CC1Options.td clang/include/clang/Frontend/CodeGenOptions.def clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/aarch64-neon-2velem.c clang/test/CodeGen/aarch64-neon-3v.c clang/test/CodeGen/aarch64-neon-across.c clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-intrinsics.c clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/aarch64-neon-misc.c clang/test/CodeGen/aarch64-neon-perm.c clang/test/CodeGen/aarch64-neon-scalar-copy.c clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c clang/test/CodeGen/aarch64-neon-shifts.c clang/test/CodeGen/aarch64-neon-tbl.c clang/test/CodeGen/aarch64-neon-vcombine.c clang/test/CodeGen/aarch64-neon-vget-hilo.c clang/test/CodeGen/aarch64-neon-vget.c clang/test/CodeGen/aarch64-poly64.c clang/test/CodeGen/address-safety-attr-kasan.cpp clang/test/CodeGen/address-safety-attr.cpp clang/test/CodeGen/arm-crc32.c clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-neon-fma.c clang/test/CodeGen/arm-neon-numeric-maxmin.c clang/test/CodeGen/arm-neon-vcvtX.c clang/test/CodeGen/arm-neon-vget.c clang/test/CodeGen/arm64-lanes.c clang/test/CodeGen/arm64_vcopy.c clang/test/CodeGen/arm64_vdupq_n_f64.c clang/test/CodeGen/attr-coldhot.c clang/test/CodeGen/builtins-arm-exclusive.c clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c clang/test/CodeGen/noduplicate-cxx11-test.cpp clang/test/CodeGen/pragma-weak.c clang/test/CodeGen/unwind-attr.c clang/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp clang/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp clang/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp clang/test/CodeGenCXX/optnone-templates.cpp clang/test/CodeGenCXX/static-init-wasm.cpp clang/test/CodeGenCXX/thunks.cpp clang/test/CodeGenObjC/gnu-exceptions.m clang/test/CodeGenOpenCL/amdgpu-attrs.cl clang/test/Driver/darwin-iphone-defaults.m Index: clang/test/Driver/darwin-iphone-defaults.m === --- clang/test/Driver/darwin-iphone-defaults.m +++ clang/test/Driver/darwin-iphone-defaults.m @@ -26,4 +26,4 @@ [I1 alloc]; } -// CHECK: attributes [[F0]] = { noinline ssp{{.*}} } +// CHECK: attributes [[F0]] = { noinline optnone ssp{{.*}} } Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl === --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -141,26 +141,26 @@ // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added inline comments. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:962 + ShouldAddOptNone &= !D->hasAttr(); + ShouldAddOptNone &= !D->hasAttr(); + ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); chandlerc wrote: > why is optnone incompatible with *cold* Because cold implies OptimizeForSize (just above this). I take no position on whether that is reasonable. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
chandlerc added inline comments. Comment at: clang/lib/CodeGen/CGOpenMPRuntime.cpp:760-762 Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); At point where we are in numerous places doing 3 coupled calls, we should add some routine to do this... Maybe we should have when I added the noinline bit. I don't have a good idea of where best to do this -- as part of or as an alternative to `SetInternalFunctionAttributes`? Something else? I'm imagining something like `SetAlwaysInlinedRuntimeFunctionAttributes` or something. Need a clang IRGen person to help push the organization in the right direction. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:892 + // -O0 adds the optnone attribute, except if specific attributes prevents it. + bool ShouldAddOptNone = attributes prevents -> attributes prevent ACtually, what do you mean by attributes here? Or should this comment instead go below, where we start to branch on the actual 'hasAttr' calls? After reading below, I understand better. Maybe: // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:899-900 -// OptimizeNone implies noinline; we should not be inlining such functions. +// OptimizeNone implies noinline; we should not be inlining such +// functions. B.addAttribute(llvm::Attribute::NoInline); Unrelated (and unnecessary) formatting change? Comment at: clang/lib/CodeGen/CodeGenModule.cpp:910-912 // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); Is this still at all correct? Why? it seems pretty confusing especially in conjunction with the code below. I think this may force you to either: a) stop early-marking of -Os and -Oz flags with these attributes (early: prior to calling this routine) and handling all of the -O flag synthesized attributes here, or b) set optnone for -O0 wher ewe set optsize for -Os and friends, and then remove it where necessary here. I don't have any strong opinion about a vs. b. Comment at: clang/lib/CodeGen/CodeGenModule.cpp:962 + ShouldAddOptNone &= !D->hasAttr(); + ShouldAddOptNone &= !D->hasAttr(); + ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); why is optnone incompatible with *cold* https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#638350, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#638299, @probinson wrote: > > > In https://reviews.llvm.org/D28404#638221, @mehdi_amini wrote: > > > > > In https://reviews.llvm.org/D28404#638217, @probinson wrote: > > > > > > > The patch as-is obviously has a massive testing cost, and it's easy to > > > > imagine people being tripped up by this in the future. > > > > > > > > > Can you clarify what massive testing cost you're referring to? > > > > > > Well, you just had to modify around 50 tests, and I'd expect some future > > tests to have to deal with it too. Maybe "massive" is overstating it but > > it seemed like an unusually large number. > > > There are two things: > > - tests are modified: when adding a new option, it does not seems unusual to > me 50 seems rather more than usual, but whatever. Granted it's not hundreds. > - what impact on future testing. I still don't see any of this future > "testing cost" you're referring to right now. Maybe I worry too much. I am getting a slightly different set of test failures than you did though. I get these failures: CodeGen/aarch64-neon-extract.c CodeGen/aarch64-poly128.c CodeGen/arm-neon-shifts.c CodeGen/arm64-crc32.c And I don't get these failures: CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp CodeGenCXX/apple-kext-no-staticinit-section.cpp CodeGenCXX/debug-info-global-ctor-dtor.cpp Comment at: clang/lib/CodeGen/CodeGenModule.cpp:900 +// OptimizeNone implies noinline; we should not be inlining such +// functions. B.addAttribute(llvm::Attribute::NoInline); I'd set ShouldAddOptNone = false here, as it's already explicit. Comment at: clang/test/CodeGen/aarch64-neon-2velem.c:1 -// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s Option specified twice. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini updated this revision to Diff 83441. mehdi_amini added a comment. Herald added subscribers: dschuff, jfb. Fix one more conflicts with always_inline, and change some test check lines https://reviews.llvm.org/D28404 Files: clang/include/clang/Driver/CC1Options.td clang/include/clang/Frontend/CodeGenOptions.def clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/aarch64-neon-2velem.c clang/test/CodeGen/aarch64-neon-3v.c clang/test/CodeGen/aarch64-neon-across.c clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-intrinsics.c clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/aarch64-neon-misc.c clang/test/CodeGen/aarch64-neon-perm.c clang/test/CodeGen/aarch64-neon-scalar-copy.c clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c clang/test/CodeGen/aarch64-neon-shifts.c clang/test/CodeGen/aarch64-neon-tbl.c clang/test/CodeGen/aarch64-neon-vcombine.c clang/test/CodeGen/aarch64-neon-vget-hilo.c clang/test/CodeGen/aarch64-neon-vget.c clang/test/CodeGen/aarch64-poly64.c clang/test/CodeGen/address-safety-attr-kasan.cpp clang/test/CodeGen/address-safety-attr.cpp clang/test/CodeGen/arm-crc32.c clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-neon-fma.c clang/test/CodeGen/arm-neon-numeric-maxmin.c clang/test/CodeGen/arm-neon-vcvtX.c clang/test/CodeGen/arm-neon-vget.c clang/test/CodeGen/arm64-lanes.c clang/test/CodeGen/arm64_vcopy.c clang/test/CodeGen/arm64_vdupq_n_f64.c clang/test/CodeGen/builtins-arm-exclusive.c clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c clang/test/CodeGen/noduplicate-cxx11-test.cpp clang/test/CodeGen/pragma-weak.c clang/test/CodeGen/unwind-attr.c clang/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp clang/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp clang/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp clang/test/CodeGenCXX/optnone-templates.cpp clang/test/CodeGenCXX/static-init-wasm.cpp clang/test/CodeGenCXX/thunks.cpp clang/test/CodeGenObjC/gnu-exceptions.m clang/test/CodeGenOpenCL/amdgpu-attrs.cl clang/test/Driver/darwin-iphone-defaults.m Index: clang/test/Driver/darwin-iphone-defaults.m === --- clang/test/Driver/darwin-iphone-defaults.m +++ clang/test/Driver/darwin-iphone-defaults.m @@ -26,4 +26,4 @@ [I1 alloc]; } -// CHECK: attributes [[F0]] = { noinline ssp{{.*}} } +// CHECK: attributes [[F0]] = { noinline optnone ssp{{.*}} } Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl === --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -141,26 +141,26 @@ // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = {
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini updated this revision to Diff 83433. mehdi_amini added a comment. Herald added a subscriber: jholewinski. Fix minsize issue (conditional was reversed) https://reviews.llvm.org/D28404 Files: clang/include/clang/Driver/CC1Options.td clang/include/clang/Frontend/CodeGenOptions.def clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/aarch64-neon-2velem.c clang/test/CodeGen/aarch64-neon-3v.c clang/test/CodeGen/aarch64-neon-across.c clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-intrinsics.c clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/aarch64-neon-misc.c clang/test/CodeGen/aarch64-neon-perm.c clang/test/CodeGen/aarch64-neon-scalar-copy.c clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c clang/test/CodeGen/aarch64-neon-shifts.c clang/test/CodeGen/aarch64-neon-tbl.c clang/test/CodeGen/aarch64-neon-vcombine.c clang/test/CodeGen/aarch64-neon-vget-hilo.c clang/test/CodeGen/aarch64-neon-vget.c clang/test/CodeGen/aarch64-poly64.c clang/test/CodeGen/address-safety-attr-kasan.cpp clang/test/CodeGen/address-safety-attr.cpp clang/test/CodeGen/arm-crc32.c clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-neon-fma.c clang/test/CodeGen/arm-neon-numeric-maxmin.c clang/test/CodeGen/arm-neon-vcvtX.c clang/test/CodeGen/arm-neon-vget.c clang/test/CodeGen/arm64-lanes.c clang/test/CodeGen/arm64_vcopy.c clang/test/CodeGen/arm64_vdupq_n_f64.c clang/test/CodeGen/attr-coldhot.c clang/test/CodeGen/builtins-arm-exclusive.c clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c clang/test/CodeGen/noduplicate-cxx11-test.cpp clang/test/CodeGen/pragma-weak.c clang/test/CodeGen/unwind-attr.c clang/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp clang/test/CodeGenCXX/apple-kext-linkage.cpp clang/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp clang/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp clang/test/CodeGenCXX/optnone-templates.cpp clang/test/CodeGenCXX/static-init-wasm.cpp clang/test/CodeGenCXX/thunks.cpp clang/test/CodeGenObjC/gnu-exceptions.m clang/test/CodeGenOpenCL/amdgpu-attrs.cl clang/test/Driver/darwin-iphone-defaults.m Index: clang/test/Driver/darwin-iphone-defaults.m === --- clang/test/Driver/darwin-iphone-defaults.m +++ clang/test/Driver/darwin-iphone-defaults.m @@ -26,4 +26,4 @@ [I1 alloc]; } -// CHECK: attributes [[F0]] = { noinline ssp{{.*}} } +// CHECK: attributes [[F0]] = { noinline optnone ssp{{.*}} } Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl === --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -disable-O0-optnone -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -disable-O0-optnone -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s __attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics kernel void flat_work_group_size_0_0() {} Index: clang/test/CodeGenObjC/gnu-exceptions.m === --- clang/test/CodeGenObjC/gnu-exceptions.m +++ clang/test/CodeGenObjC/gnu-exceptions.m @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -fexceptions -fobjc-exceptions -fobjc-runtime=gcc -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-freebsd -emit-llvm -fexceptions -fobjc-exceptions -fobjc-runtime=gnustep-1.7 -o - %s | FileCheck -check-prefix=NEW-ABI %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -disable-O0-optnone -emit-llvm -fexceptions -fobjc-exceptions -fobjc-runtime=gcc -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-freebsd -disable-O0-optnone -emit-llvm -fexceptions -fobjc-exceptions -fobjc-runtime=gnustep-1.7 -o - %s | FileCheck -check-prefix=NEW-ABI %s void opaque(void); void log(int i); Index: clang/test/CodeGenCXX/thunks.cpp === --- clang/test/CodeGenCXX/thunks.cpp +++ clang/test/CodeGenCXX/thunks.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT %s -// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes | FileCheck --check-prefix=CHECK
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#638299, @probinson wrote: > In https://reviews.llvm.org/D28404#638221, @mehdi_amini wrote: > > > In https://reviews.llvm.org/D28404#638217, @probinson wrote: > > > > > The patch as-is obviously has a massive testing cost, and it's easy to > > > imagine people being tripped up by this in the future. > > > > > > Can you clarify what massive testing cost you're referring to? > > > Well, you just had to modify around 50 tests, and I'd expect some future > tests to have to deal with it too. Maybe "massive" is overstating it but it > seemed like an unusually large number. There are two things: - tests are modified: when adding a new option, it does not seems unusual to me - what impact on future testing. I still don't see any of this future "testing cost" you're referring to right now. > I don't know that just slapping the option on all these tests is really the > most appropriate fix, either, in some cases. I'll look at it more. For instance the ARM test are relying on piping the output of clang to mem2reg to clean the IR and have simpler check patterns (I assume). This is not compatible with optnone obviously. On the other hand I don't want to update the check lines for > 2 lines in testsclang/test/CodeGen/aarch64-neon-intrinsics.c just to save passing an option. It's likely that some of these test could have their check line adapted, but I didn't see much interest in doing this. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. In https://reviews.llvm.org/D28404#638221, @mehdi_amini wrote: > In https://reviews.llvm.org/D28404#638217, @probinson wrote: > > > The patch as-is obviously has a massive testing cost, and it's easy to > > imagine people being tripped up by this in the future. > > > Can you clarify what massive testing cost you're referring to? Well, you just had to modify around 50 tests, and I'd expect some future tests to have to deal with it too. Maybe "massive" is overstating it but it seemed like an unusually large number. I don't know that just slapping the option on all these tests is really the most appropriate fix, either, in some cases. I'll look at it more. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini added a comment. In https://reviews.llvm.org/D28404#638217, @probinson wrote: > Maybe instead, pass a flag to enable setting optnone on everything when the > driver sees `-O0 -flto`? I'm not fond of this: limiting discrepancy between LTO and non-LTO reduces the LTO specific bugs and reduces the maintenance of LTO. > The patch as-is obviously has a massive testing cost, and it's easy to > imagine people being tripped up by this in the future. Can you clarify what massive testing cost you're referring to? https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
probinson added a comment. Maybe instead, pass a flag to enable setting optnone on everything when the driver sees `-O0 -flto`? The patch as-is obviously has a massive testing cost, and it's easy to imagine people being tripped up by this in the future. https://reviews.llvm.org/D28404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini updated this revision to Diff 83391. mehdi_amini added a comment. Herald added a subscriber: wdng. Remove spurious change https://reviews.llvm.org/D28404 Files: clang/include/clang/Driver/CC1Options.td clang/include/clang/Frontend/CodeGenOptions.def clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/aarch64-neon-2velem.c clang/test/CodeGen/aarch64-neon-3v.c clang/test/CodeGen/aarch64-neon-across.c clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-intrinsics.c clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/aarch64-neon-misc.c clang/test/CodeGen/aarch64-neon-perm.c clang/test/CodeGen/aarch64-neon-scalar-copy.c clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c clang/test/CodeGen/aarch64-neon-shifts.c clang/test/CodeGen/aarch64-neon-tbl.c clang/test/CodeGen/aarch64-neon-vcombine.c clang/test/CodeGen/aarch64-neon-vget-hilo.c clang/test/CodeGen/aarch64-neon-vget.c clang/test/CodeGen/aarch64-poly64.c clang/test/CodeGen/address-safety-attr-kasan.cpp clang/test/CodeGen/address-safety-attr.cpp clang/test/CodeGen/arm-crc32.c clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-neon-fma.c clang/test/CodeGen/arm-neon-numeric-maxmin.c clang/test/CodeGen/arm-neon-vcvtX.c clang/test/CodeGen/arm-neon-vget.c clang/test/CodeGen/arm64-lanes.c clang/test/CodeGen/arm64_vcopy.c clang/test/CodeGen/arm64_vdupq_n_f64.c clang/test/CodeGen/attr-coldhot.c clang/test/CodeGen/builtins-arm-exclusive.c clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c clang/test/CodeGen/noduplicate-cxx11-test.cpp clang/test/CodeGen/pragma-weak.c clang/test/CodeGen/unwind-attr.c clang/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp clang/test/CodeGenCXX/apple-kext-linkage.cpp clang/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp clang/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp clang/test/CodeGenCXX/optnone-templates.cpp clang/test/CodeGenCXX/static-init-wasm.cpp clang/test/CodeGenCXX/thunks.cpp clang/test/CodeGenObjC/attr-minsize.m clang/test/CodeGenObjC/gnu-exceptions.m clang/test/CodeGenOpenCL/amdgpu-attrs.cl clang/test/Driver/darwin-iphone-defaults.m clang/test/OpenMP/nvptx_teams_codegen.cpp Index: clang/test/OpenMP/nvptx_teams_codegen.cpp === --- clang/test/OpenMP/nvptx_teams_codegen.cpp +++ clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -1,8 +1,8 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -disable-O0-optnone -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -disable-O0-optnone -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -disable-O0-optnone -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -disable-O0-optnone -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -60,10 +60,10 @@ #endif // CK1 // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 -// RUN: %clang_cc1 -DCK2 -verify
[PATCH] D28404: IRGen: Add optnone attribute on function during O0
mehdi_amini created this revision. mehdi_amini added reviewers: chandlerc, rsmith. mehdi_amini added subscribers: cfe-commits, dexonsmith. Herald added a reviewer: tstellarAMD. Herald added a subscriber: nhaehnle. Amongst other, this will help LTO to correctly handle/honor files compiled with O0, helping debugging failures. It also seems in line with how we handle other options, like how `-fnoinline` add the appropriate attribute as well. https://reviews.llvm.org/D28404 Files: clang/include/clang/Driver/CC1Options.td clang/include/clang/Frontend/CodeGenOptions.def clang/lib/CodeGen/CGCall.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/aarch64-neon-2velem.c clang/test/CodeGen/aarch64-neon-3v.c clang/test/CodeGen/aarch64-neon-across.c clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c clang/test/CodeGen/aarch64-neon-fma.c clang/test/CodeGen/aarch64-neon-intrinsics.c clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/aarch64-neon-misc.c clang/test/CodeGen/aarch64-neon-perm.c clang/test/CodeGen/aarch64-neon-scalar-copy.c clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c clang/test/CodeGen/aarch64-neon-shifts.c clang/test/CodeGen/aarch64-neon-tbl.c clang/test/CodeGen/aarch64-neon-vcombine.c clang/test/CodeGen/aarch64-neon-vget-hilo.c clang/test/CodeGen/aarch64-neon-vget.c clang/test/CodeGen/aarch64-poly64.c clang/test/CodeGen/address-safety-attr-kasan.cpp clang/test/CodeGen/address-safety-attr.cpp clang/test/CodeGen/arm-crc32.c clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-neon-fma.c clang/test/CodeGen/arm-neon-numeric-maxmin.c clang/test/CodeGen/arm-neon-vcvtX.c clang/test/CodeGen/arm-neon-vget.c clang/test/CodeGen/arm64-lanes.c clang/test/CodeGen/arm64_vcopy.c clang/test/CodeGen/arm64_vdupq_n_f64.c clang/test/CodeGen/attr-coldhot.c clang/test/CodeGen/builtins-arm-exclusive.c clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c clang/test/CodeGen/noduplicate-cxx11-test.cpp clang/test/CodeGen/pragma-weak.c clang/test/CodeGen/unwind-attr.c clang/test/CodeGenCXX/apple-kext-indirect-virtual-dtor-call.cpp clang/test/CodeGenCXX/apple-kext-linkage.cpp clang/test/CodeGenCXX/apple-kext-no-staticinit-section.cpp clang/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp clang/test/CodeGenCXX/optnone-templates.cpp clang/test/CodeGenCXX/static-init-wasm.cpp clang/test/CodeGenCXX/thunks.cpp clang/test/CodeGenObjC/attr-minsize.m clang/test/CodeGenObjC/gnu-exceptions.m clang/test/CodeGenOpenCL/amdgpu-attrs.cl clang/test/Driver/darwin-iphone-defaults.m clang/test/OpenMP/nvptx_teams_codegen.cpp Index: clang/test/OpenMP/nvptx_teams_codegen.cpp === --- clang/test/OpenMP/nvptx_teams_codegen.cpp +++ clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -1,8 +1,8 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -disable-O0-optnone -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -disable-O0-optnone -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -disable-O0-optnone -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -disable-O0-optnone -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -60,10 +60,10 @@ #endif // CK1 // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown