[PATCH] D38210: [ubsan] Port the function sanitizer to C

2017-10-03 Thread Vedant Kumar via Phabricator via cfe-commits
vsk planned changes to this revision.
vsk added a comment.

In https://reviews.llvm.org/D38210#887635, @pcc wrote:

> Wouldn't we get false positives if there is an indirect call in C++ code that 
> calls into C code (or vice versa)?


Ah, right, I'm surprised I didn't hit that while testing.

> I think I'd prefer it if we came up with a precise encoding of function types 
> that was independent of RTTI, and use it in all languages. One possibility 
> would be to represent each function type with an object of size 1 whose name 
> contains the mangled function type, and use its address as the identity of 
> the function type.

That makes sense. Like the RTTI object it could be made linkonce_odr.


https://reviews.llvm.org/D38210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38210: [ubsan] Port the function sanitizer to C

2017-10-03 Thread Peter Collingbourne via Phabricator via cfe-commits
pcc added a comment.

Wouldn't we get false positives if there is an indirect call in C++ code that 
calls into C code (or vice versa)?

I think I'd prefer it if we came up with a precise encoding of function types 
that was independent of RTTI, and use it in all languages. One possibility 
would be to represent each function type with an object of size 1 whose name 
contains the mangled function type, and use its address as the identity of the 
function type.


https://reviews.llvm.org/D38210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38210: [ubsan] Port the function sanitizer to C

2017-10-03 Thread Vedant Kumar via Phabricator via cfe-commits
vsk added a reviewer: arphaman.
vsk added a comment.

Ping.


https://reviews.llvm.org/D38210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38210: [ubsan] Port the function sanitizer to C

2017-09-25 Thread Vedant Kumar via Phabricator via cfe-commits
vsk updated this revision to Diff 116453.
vsk added a comment.

- Remove some noisy changes.


https://reviews.llvm.org/D38210

Files:
  docs/UndefinedBehaviorSanitizer.rst
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/sanitize-function-calls.c

Index: test/CodeGen/sanitize-function-calls.c
===
--- /dev/null
+++ test/CodeGen/sanitize-function-calls.c
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -w -triple i386-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X32
+// RUN: %clang_cc1 -w -triple x86_64-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X64
+
+struct S {};
+
+// X32: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i32 4 to i8*)
+// X64: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i64 4 to i8*)
+
+// X32: prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[no_proto_ti]] to i32), i32 ptrtoint (void ()* @no_proto to i32)) }>
+// X64: prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** [[no_proto_ti]] to i64), i64 ptrtoint (void ()* @no_proto to i64)) to i32) }>
+void no_proto() {}
+
+void proto(void) {}
+
+typedef struct S (*vfunc0)(void);
+typedef void (*vfunc1)(void);
+typedef char (*vfunc2)(void);
+typedef short (*vfunc3)(void);
+typedef int (*vfunc4)(void);
+typedef long long (*vfunc5)(void);
+typedef float (*vfunc6)(void);
+typedef double (*vfunc7)(void);
+typedef void (*vfunc8)(int, int, int, int, int, int, int, int, int, int, int);
+
+// X64-LABEL: @call_proto
+void call_proto(void) {
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc0 f0 = 
+  f0();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 4 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc1 f1 = 
+  f1();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 16 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc2 f2 = 
+  f2();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 20 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc3 f3 = 
+  f3();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 24 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc4 f4 = 
+  f4();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 28 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc5 f5 = 
+  f5();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 8 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc6 f6 = 
+  f6();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 12 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc7 f7 = 
+  f7();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 3681400516 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc8 f8 = 
+  f8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+}
+
+// X64-LABEL: @call_no_proto
+void call_no_proto(void) {
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc0 f0 = _proto;
+  f0();
+}
+
+// X64-LABEL: @main
+int main() {
+  call_proto();
+  call_no_proto();
+  return 0;
+}
Index: lib/CodeGen/CodeGenModule.h
===
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -745,6 +745,9 @@
   /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
+  /// Get the type descriptor for a function for use with UBSan.
+  llvm::Constant *GetUBSanFunctionTypeDescriptor(QualType Ty);
+
   /// Get the address of a uuid descriptor .
   ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
 
Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -4495,6 +4495,62 @@
   return getCXXABI().getAddrOfRTTIDescriptor(Ty);
 }
 
+llvm::Constant *CodeGenModule::GetUBSanFunctionTypeDescriptor(QualType Ty) {
+  if (getLangOpts().CPlusPlus)
+return GetAddrOfRTTIDescriptor(Ty, /*ForEH=*/true);
+
+  // Bits:
+  //  0: unused
+  //  1: unused
+  //  2-4  : encode(returnType)
+  //  5-6  : encode(param1)
+  //  ...  : encode(paramK)
+  //  30-32: encode(param9)
+
+  auto encode = [this](QualType EncodeTy) {
+// Encode one of: ?, void, f32, f64, i8, i16, i32, i64 (in 3 bits)
+const auto *T = EncodeTy.getTypePtr();
+if (T->isVoidType())
+  return 1;
+
+uint64_t Size = getContext().getTypeSize(EncodeTy);
+if (T->hasFloatingRepresentation()) {
+  switch (Size) {
+  case 32:
+

[PATCH] D38210: [ubsan] Port the function sanitizer to C

2017-09-25 Thread Vedant Kumar via Phabricator via cfe-commits
vsk created this revision.

The function sanitizer relies on RTTI to check callee types, but this
scheme doesn't work well in languages without the ODR.

This patch introduces a simple, best-effort function type encoding
which can be used when RTTI isn't available. In this scheme, function
types are encoded within 32 bits. The return type and all parameter
types are recorded using a 3-bit encoding. Zero is a special value in
the 3-bit encoding which means "there is either no type here OR any type
would be permissible here".

This scheme allows false negatives, but not false positives. It's simple
and does not require any changes to the instrumentation.

Testing: I've found some minor issues with the new check, and no FPs.

https://trac.ffmpeg.org/ticket/6685
https://github.com/openssl/openssl/issues/4413


https://reviews.llvm.org/D38210

Files:
  docs/UndefinedBehaviorSanitizer.rst
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/sanitize-function-calls.c

Index: test/CodeGen/sanitize-function-calls.c
===
--- /dev/null
+++ test/CodeGen/sanitize-function-calls.c
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -w -triple i386-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X32
+// RUN: %clang_cc1 -w -triple x86_64-linux-gnu -fsanitize=function -emit-llvm -o - %s | FileCheck %s --check-prefixes=X64
+
+struct S {};
+
+// X32: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i32 4 to i8*)
+// X64: [[no_proto_ti:@.*]] = private constant i8* inttoptr (i64 4 to i8*)
+
+// X32: prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[no_proto_ti]] to i32), i32 ptrtoint (void ()* @no_proto to i32)) }>
+// X64: prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @no_proto to i64)) to i32) }>
+void no_proto() {}
+
+void proto(void) {}
+
+typedef struct S (*vfunc0)(void);
+typedef void (*vfunc1)(void);
+typedef char (*vfunc2)(void);
+typedef short (*vfunc3)(void);
+typedef int (*vfunc4)(void);
+typedef long long (*vfunc5)(void);
+typedef float (*vfunc6)(void);
+typedef double (*vfunc7)(void);
+typedef void (*vfunc8)(int, int, int, int, int, int, int, int, int, int, int);
+
+// X64-LABEL: @call_proto
+void call_proto(void) {
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc0 f0 = 
+  f0();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 4 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc1 f1 = 
+  f1();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 16 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc2 f2 = 
+  f2();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 20 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc3 f3 = 
+  f3();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 24 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc4 f4 = 
+  f4();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 28 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc5 f5 = 
+  f5();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 8 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc6 f6 = 
+  f6();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 12 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc7 f7 = 
+  f7();
+
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, inttoptr (i64 3681400516 to i8*), !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc8 f8 = 
+  f8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+}
+
+// X64-LABEL: @call_no_proto
+void call_no_proto(void) {
+  // X64: [[ICMP:%.*]] = icmp eq i8* {{.*}}, null, !nosanitize
+  // X64-NEXT: br i1 [[ICMP]], {{.*}} !nosanitize
+  vfunc0 f0 = _proto;
+  f0();
+}
+
+// X64-LABEL: @main
+int main() {
+  call_proto();
+  call_no_proto();
+  return 0;
+}
Index: lib/CodeGen/CodeGenModule.h
===
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -745,6 +745,9 @@
   /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
+  /// Get the type descriptor for a function for use with UBSan.
+  llvm::Constant *GetUBSanFunctionTypeDescriptor(QualType Ty);
+
   /// Get the address of a uuid descriptor .
   ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
 
Index: lib/CodeGen/CodeGenModule.cpp
===
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -4495,6 +4495,62 @@
   return