[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-05-19 Thread Fangrui Song via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
MaskRay marked an inline comment as done.
Closed by commit rGad31a2dcadfc: Change -fsanitize=function to place two words 
before the function entry (authored by MaskRay).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/CodeGen/TargetInfo.h
  clang/test/CodeGen/ubsan-function.cpp
  clang/test/CodeGenCXX/catch-undef-behavior.cpp
  clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
  compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll
  llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll

Index: llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
===
--- llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
+++ llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
 ; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
 
+@_ZTIFvvE = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant ptr @_ZTIFvvE
+
 ;; -fpatchable-function-entry=0 -fcf-protection=branch
 define void @f0() "patchable-function-entry"="0" {
 ; CHECK-LABEL: f0:
@@ -83,6 +86,25 @@
   ret void
 }
 
+;; Test the interaction with -fsanitize=function.
+; CHECK:  .type sanitize_function,@function
+; CHECK-NEXT: .Ltmp{{.*}}:
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   .long   3238382334
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-sanitize_function
+; CHECK-NEXT: sanitize_function:
+; CHECK-NEXT: .Lfunc_begin{{.*}}:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; 32-NEXT:  endbr32
+; 64-NEXT:  endbr64
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   ret
+define void @sanitize_function(ptr noundef %x) "patchable-function-prefix"="1" "patchable-function-entry"="1" !func_sanitize !1 {
+  ret void
+}
+
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 8, !"cf-protection-branch", i32 1}
+!1 = !{i32 3238382334, ptr @__llvm_rtti_proxy}
Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- llvm/test/CodeGen/X86/func-sanitizer.ll
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-; CHECK: _Z3funv:
-; CHECK: .cfi_startproc
-; CHECK: .long   846595819
-; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
-; CHECK: .L__llvm_rtti_proxy:
-; CHECK: .quad   i
-; CHECK: .size   .L__llvm_rtti_proxy, 8
+; CHECK:  .type _Z3funv,@function
+; CHECK-NEXT:   .long   3238382334  # 0xc105cafe
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK-NEXT: _Z3funv:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; CHECK-NEXT:   retq
 
 @i = linkonce_odr constant i32 1
 @__llvm_rtti_proxy = private unnamed_addr constant ptr @i
@@ -15,4 +15,4 @@
   ret void
 }
 
-!0 = !{i32 846595819, ptr @__llvm_rtti_proxy}
+!0 = !{i32 3238382334, ptr @__llvm_rtti_proxy}
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -971,6 +971,21 @@
 CurrentPatchableFunctionEntrySym = CurrentFnBegin;
   }
 
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
+
   if (isVerbose()) {
 F.printAsOperand(OutStreamer->getCommentOS(),
  /*PrintType=*/false, F.getParent());
@@ -1025,24 +1040,6 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
-
-  // Emit the function prologue data for the indirect call sanitizer.
-  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
-assert(TM.getTargetTriple().getArch() == Triple::x86 ||
-   TM.getTargetTriple().getArch() == Triple::x86_64);
-assert(MD->getNumOperands() == 2);
-
-auto *PrologueSig = 

[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-05-19 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay marked an inline comment as done.
MaskRay added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.h:205
   getUBSanFunctionSignature(CodeGen::CodeGenModule ) const {
-return nullptr;
+return llvm::ConstantInt::get(CGM.Int32Ty, 0xc105cafe);
   }

peter.smith wrote:
> Is it worth making this target specific? Defaulting to 0xc105cafe for all 
> targets, if that gets allocated in the future on any one target we can change 
> it without having to test against all other targets.
> 
> For example on AArch64 this is is in the decoding space of SME instructions 
> op0 = 0b1 op1 = 0b. This may get allocated in the future. Although 
> admittedly it is likely to be very rare to find a use of a SME instruction at 
> the end of a function to cause it to get misidentified.
> 
> I suspect most targets have an explicit undefined instruction, such as UDF in 
> AArch64 which is 0x where ? can be any value. On Arm there two 
> separate 4-byte encodings for Arm, Thumb of UDF.
> 
Thanks for the review! This is a virtual function, so a target can override as 
appropriate.

Thanks for informing that this is an encoding that AArch64 may allocate in the 
future. Since the signature is placed before the function label and is used to 
protect uninstrumented object files, the signature doesn't need to be out of 
all encoding space. As long as it is unlikely to be the last 2 instructions of 
a previous function, this is going to have a good defensive effect. So I expect 
that A32/A64 may not want to change this as well.

I assume that T32 unlikely uses 0xca 0xca (ldm r2, {r1-r7}) as one of the last 
two instructions of the previous function, so this seems fine as well, but no 
objection if T32 wants to change to a different signature :)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-05-19 Thread Peter Smith via Phabricator via cfe-commits
peter.smith accepted this revision.
peter.smith added a comment.
This revision is now accepted and ready to land.

Apologies for the delay LGTM. I think there is a case for setting up the 
signature to be target specific, but that could in theory be done on demand 
when a target adds a clashing instruction.




Comment at: clang/lib/CodeGen/TargetInfo.h:205
   getUBSanFunctionSignature(CodeGen::CodeGenModule ) const {
-return nullptr;
+return llvm::ConstantInt::get(CGM.Int32Ty, 0xc105cafe);
   }

Is it worth making this target specific? Defaulting to 0xc105cafe for all 
targets, if that gets allocated in the future on any one target we can change 
it without having to test against all other targets.

For example on AArch64 this is is in the decoding space of SME instructions op0 
= 0b1 op1 = 0b. This may get allocated in the future. Although admittedly 
it is likely to be very rare to find a use of a SME instruction at the end of a 
function to cause it to get misidentified.

I suspect most targets have an explicit undefined instruction, such as UDF in 
AArch64 which is 0x where ? can be any value. On Arm there two separate 
4-byte encodings for Arm, Thumb of UDF.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-05-18 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added a comment.

Ping:)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-05-09 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added a comment.

In D148665#4316310 , @peter.smith 
wrote:

> My apologies for not responding. If I've got this right there are 4 related 
> patches:
> D148573  (approved)
> D148785  Use type hashes rather than RTTI 
> D148827  support C
> D148665  (this one)



> My initial impressions is that this makes -fsanitize=function look more like 
> -fsanitize=kcfi which makes it accessible from C and available to more 
> targets. Is there anything that we lose in the process of making these 
> changes? For example I would expect RTTI to have more information available 
> than a type hash, although this might not make any functional difference.
>
> I'll try and look over the next few days and leave some comments, apologies a 
> bit busy at work at the moment so I can't promise anything speedy.

Thanks! `-fsanitize=function` will indeed become more like `-fsanitize=kcfi`.

There is a big difference that `-fsanitize=function` instrumented code has a 
signature check for compatibility with object files not compiled with 
`-fsanitize=function` (and old implementations of `-fsanitize=function` with a 
difference location to place the signature).
-fsanitize=kcfi doesn't have the compatibility guarantee.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-05-03 Thread Peter Smith via Phabricator via cfe-commits
peter.smith added a comment.

My apologies for not responding. If I've got this right there are 4 related 
patches:
D148573  (approved)
D148785  Use type hashes rather than RTTI 
D148827  support C
D148665  (this one)

My initial impressions is that this makes -fsanitize=function look more like 
-fsanitize=kcfi which makes it accessible from C and available to more targets. 
Is there anything that we lose in the process of making these changes? For 
example I would expect RTTI to have more information available than a type 
hash, although this might not make any functional difference.

I'll try and look over the next few days and leave some comments, apologies a 
bit busy at work at the moment so I can't promise anything speedy.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-05-02 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added a comment.

Ping:)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-04-25 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added a comment.

ping :)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-04-19 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay updated this revision to Diff 515166.
MaskRay added a comment.

update test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/CodeGen/TargetInfo.h
  clang/test/CodeGen/ubsan-function.cpp
  clang/test/CodeGenCXX/catch-undef-behavior.cpp
  clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
  compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll
  llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll

Index: llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
===
--- llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
+++ llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
 ; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
 
+@_ZTIFvvE = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant ptr @_ZTIFvvE
+
 ;; -fpatchable-function-entry=0 -fcf-protection=branch
 define void @f0() "patchable-function-entry"="0" {
 ; CHECK-LABEL: f0:
@@ -83,6 +86,25 @@
   ret void
 }
 
+;; Test the interaction with -fsanitize=function.
+; CHECK:  .type sanitize_function,@function
+; CHECK-NEXT: .Ltmp{{.*}}:
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   .long   3238382334
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-sanitize_function
+; CHECK-NEXT: sanitize_function:
+; CHECK-NEXT: .Lfunc_begin{{.*}}:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; 32-NEXT:  endbr32
+; 64-NEXT:  endbr64
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   ret
+define void @sanitize_function(ptr noundef %x) "patchable-function-prefix"="1" "patchable-function-entry"="1" !func_sanitize !1 {
+  ret void
+}
+
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 8, !"cf-protection-branch", i32 1}
+!1 = !{i32 3238382334, ptr @__llvm_rtti_proxy}
Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- llvm/test/CodeGen/X86/func-sanitizer.ll
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-; CHECK: _Z3funv:
-; CHECK: .cfi_startproc
-; CHECK: .long   846595819
-; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
-; CHECK: .L__llvm_rtti_proxy:
-; CHECK: .quad   i
-; CHECK: .size   .L__llvm_rtti_proxy, 8
+; CHECK:  .type _Z3funv,@function
+; CHECK-NEXT:   .long   3238382334  # 0xc105cafe
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK-NEXT: _Z3funv:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; CHECK-NEXT:   retq
 
 @i = linkonce_odr constant i32 1
 @__llvm_rtti_proxy = private unnamed_addr constant ptr @i
@@ -15,4 +15,4 @@
   ret void
 }
 
-!0 = !{i32 846595819, ptr @__llvm_rtti_proxy}
+!0 = !{i32 3238382334, ptr @__llvm_rtti_proxy}
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -970,6 +970,21 @@
 CurrentPatchableFunctionEntrySym = CurrentFnBegin;
   }
 
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
+
   if (isVerbose()) {
 F.printAsOperand(OutStreamer->getCommentOS(),
  /*PrintType=*/false, F.getParent());
@@ -1024,24 +1039,6 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
-
-  // Emit the function prologue data for the indirect call sanitizer.
-  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
-assert(TM.getTargetTriple().getArch() == Triple::x86 ||
-   TM.getTargetTriple().getArch() == Triple::x86_64);
-assert(MD->getNumOperands() == 2);
-
-auto *PrologueSig = mdconst::extract(MD->getOperand(0));
-auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
-assert(PrologueSig && FTRTTIProxy);
-emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
-
-const MCExpr *Proxy = 

[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-04-19 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay updated this revision to Diff 515157.
MaskRay edited the summary of this revision.
MaskRay added a comment.

remove an assert to make this feature available to all targets


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/CodeGen/TargetInfo.h
  clang/test/CodeGen/ubsan-function.cpp
  compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll
  llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll

Index: llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
===
--- llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
+++ llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
 ; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
 
+@_ZTIFvvE = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant ptr @_ZTIFvvE
+
 ;; -fpatchable-function-entry=0 -fcf-protection=branch
 define void @f0() "patchable-function-entry"="0" {
 ; CHECK-LABEL: f0:
@@ -83,6 +86,25 @@
   ret void
 }
 
+;; Test the interaction with -fsanitize=function.
+; CHECK:  .type sanitize_function,@function
+; CHECK-NEXT: .Ltmp{{.*}}:
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   .long   3238382334
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-sanitize_function
+; CHECK-NEXT: sanitize_function:
+; CHECK-NEXT: .Lfunc_begin{{.*}}:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; 32-NEXT:  endbr32
+; 64-NEXT:  endbr64
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   ret
+define void @sanitize_function(ptr noundef %x) "patchable-function-prefix"="1" "patchable-function-entry"="1" !func_sanitize !1 {
+  ret void
+}
+
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 8, !"cf-protection-branch", i32 1}
+!1 = !{i32 3238382334, ptr @__llvm_rtti_proxy}
Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- llvm/test/CodeGen/X86/func-sanitizer.ll
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-; CHECK: _Z3funv:
-; CHECK: .cfi_startproc
-; CHECK: .long   846595819
-; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
-; CHECK: .L__llvm_rtti_proxy:
-; CHECK: .quad   i
-; CHECK: .size   .L__llvm_rtti_proxy, 8
+; CHECK:  .type _Z3funv,@function
+; CHECK-NEXT:   .long   3238382334  # 0xc105cafe
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK-NEXT: _Z3funv:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; CHECK-NEXT:   retq
 
 @i = linkonce_odr constant i32 1
 @__llvm_rtti_proxy = private unnamed_addr constant ptr @i
@@ -15,4 +15,4 @@
   ret void
 }
 
-!0 = !{i32 846595819, ptr @__llvm_rtti_proxy}
+!0 = !{i32 3238382334, ptr @__llvm_rtti_proxy}
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -970,6 +970,21 @@
 CurrentPatchableFunctionEntrySym = CurrentFnBegin;
   }
 
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
+
   if (isVerbose()) {
 F.printAsOperand(OutStreamer->getCommentOS(),
  /*PrintType=*/false, F.getParent());
@@ -1024,24 +1039,6 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
-
-  // Emit the function prologue data for the indirect call sanitizer.
-  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
-assert(TM.getTargetTriple().getArch() == Triple::x86 ||
-   TM.getTargetTriple().getArch() == Triple::x86_64);
-assert(MD->getNumOperands() == 2);
-
-auto *PrologueSig = mdconst::extract(MD->getOperand(0));
-auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
-assert(PrologueSig && FTRTTIProxy);
-emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
-
-const MCExpr *Proxy = 

[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-04-18 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay updated this revision to Diff 514761.
MaskRay added a comment.

fix clang/test/CodeGen


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148665/new/

https://reviews.llvm.org/D148665

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/test/CodeGen/ubsan-function.cpp
  compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll
  llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll

Index: llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
===
--- llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
+++ llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
 ; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
 
+@_ZTIFvvE = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant ptr @_ZTIFvvE
+
 ;; -fpatchable-function-entry=0 -fcf-protection=branch
 define void @f0() "patchable-function-entry"="0" {
 ; CHECK-LABEL: f0:
@@ -83,6 +86,25 @@
   ret void
 }
 
+;; Test the interaction with -fsanitize=function.
+; CHECK:  .type sanitize_function,@function
+; CHECK-NEXT: .Ltmp{{.*}}:
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   .long   846595819
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-sanitize_function
+; CHECK-NEXT: sanitize_function:
+; CHECK-NEXT: .Lfunc_begin{{.*}}:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; 32-NEXT:  endbr32
+; 64-NEXT:  endbr64
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   ret
+define void @sanitize_function(ptr noundef %x) "patchable-function-prefix"="1" "patchable-function-entry"="1" !func_sanitize !1 {
+  ret void
+}
+
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 8, !"cf-protection-branch", i32 1}
+!1 = !{i32 846595819, ptr @__llvm_rtti_proxy}
Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- llvm/test/CodeGen/X86/func-sanitizer.ll
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-; CHECK: _Z3funv:
-; CHECK: .cfi_startproc
-; CHECK: .long   846595819
-; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
-; CHECK: .L__llvm_rtti_proxy:
-; CHECK: .quad   i
-; CHECK: .size   .L__llvm_rtti_proxy, 8
+; CHECK:  .type _Z3funv,@function
+; CHECK-NEXT:   .long   846595819  # 0x327606eb
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK-NEXT: _Z3funv:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; CHECK-NEXT:   retq
 
 @i = linkonce_odr constant i32 1
 @__llvm_rtti_proxy = private unnamed_addr constant ptr @i
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -970,6 +970,22 @@
 CurrentPatchableFunctionEntrySym = CurrentFnBegin;
   }
 
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(TM.getTargetTriple().isX86());
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+// Use 32 bit since only small code model is supported.
+OutStreamer->emitValue(PCRel, 4u);
+  }
+
   if (isVerbose()) {
 F.printAsOperand(OutStreamer->getCommentOS(),
  /*PrintType=*/false, F.getParent());
@@ -1024,24 +1040,6 @@
   // Emit the prologue data.
   if (F.hasPrologueData())
 emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
-
-  // Emit the function prologue data for the indirect call sanitizer.
-  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
-assert(TM.getTargetTriple().getArch() == Triple::x86 ||
-   TM.getTargetTriple().getArch() == Triple::x86_64);
-assert(MD->getNumOperands() == 2);
-
-auto *PrologueSig = mdconst::extract(MD->getOperand(0));
-auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
-assert(PrologueSig && FTRTTIProxy);
-emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
-
-const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
-const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
-const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
-// Use 32 bit since only small code model is supported.
-

[PATCH] D148665: Change -fsanitize=function to place two words before the function entry

2023-04-18 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay created this revision.
MaskRay added reviewers: dmgreen, lenary, pcc, peter.smith.
Herald added subscribers: Enna1, hiraditya, kristof.beyls.
Herald added a project: All.
MaskRay requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

The current implementation of -fsanitize=function places two words (the prolog
signature and the RTTI proxy) at the function entry, which makes the feature
incompatible with Intel Indirect Branch Tracking that needs an ENDBR instruction
at the function entry. To allow the combination, move the two words before the
function entry, similar to -fsanitize=kcfi.

The code will also be shared with my pending patch implementing
-fsanitize=function for AArch64 (Branch Target Identification has a similar
requirement).

---

For the removed function in AsmPrinter.cpp, remove an assert: `mdconst::extract`
already asserts non-nullness.

For compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp,
when the function doesn't have prolog/epilog (-O1 and above), after moving the 
two words,
the address of the function equals the address of ret instruction,
so symbolizing the function will additionally get a non-zero column number.
Adjust the test to allow an optional column number.

.long   846595819
.long   .L__llvm_rtti_proxy-_Z1fv
  _Z1fv:   // symbolizing here retrieves the line table entry from the second 
.loc
.file   0 ...
.loc0 1 0
.cfi_startproc
.loc0 2 1 prologue_end
retq


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D148665

Files:
  clang/lib/CodeGen/CGExpr.cpp
  compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/test/CodeGen/X86/func-sanitizer.ll
  llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll

Index: llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
===
--- llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
+++ llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
 ; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
 
+@_ZTIFvvE = linkonce_odr constant i32 1
+@__llvm_rtti_proxy = private unnamed_addr constant ptr @_ZTIFvvE
+
 ;; -fpatchable-function-entry=0 -fcf-protection=branch
 define void @f0() "patchable-function-entry"="0" {
 ; CHECK-LABEL: f0:
@@ -83,6 +86,25 @@
   ret void
 }
 
+;; Test the interaction with -fsanitize=function.
+; CHECK:  .type sanitize_function,@function
+; CHECK-NEXT: .Ltmp{{.*}}:
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   .long   846595819
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-sanitize_function
+; CHECK-NEXT: sanitize_function:
+; CHECK-NEXT: .Lfunc_begin{{.*}}:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; 32-NEXT:  endbr32
+; 64-NEXT:  endbr64
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   ret
+define void @sanitize_function(ptr noundef %x) "patchable-function-prefix"="1" "patchable-function-entry"="1" !func_sanitize !1 {
+  ret void
+}
+
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 8, !"cf-protection-branch", i32 1}
+!1 = !{i32 846595819, ptr @__llvm_rtti_proxy}
Index: llvm/test/CodeGen/X86/func-sanitizer.ll
===
--- llvm/test/CodeGen/X86/func-sanitizer.ll
+++ llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-; CHECK: _Z3funv:
-; CHECK: .cfi_startproc
-; CHECK: .long   846595819
-; CHECK: .long   .L__llvm_rtti_proxy-_Z3funv
-; CHECK: .L__llvm_rtti_proxy:
-; CHECK: .quad   i
-; CHECK: .size   .L__llvm_rtti_proxy, 8
+; CHECK:  .type _Z3funv,@function
+; CHECK-NEXT:   .long   846595819  # 0x327606eb
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK-NEXT: _Z3funv:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; CHECK-NEXT:   retq
 
 @i = linkonce_odr constant i32 1
 @__llvm_rtti_proxy = private unnamed_addr constant ptr @i
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -970,6 +970,22 @@
 CurrentPatchableFunctionEntrySym = CurrentFnBegin;
   }
 
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+assert(TM.getTargetTriple().isX86());
+assert(MD->getNumOperands() == 2);
+
+auto *PrologueSig = mdconst::extract(MD->getOperand(0));
+auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1));
+emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+const MCExpr *FnExp =