[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-10 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay closed 
https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-09 Thread Fangrui Song via cfe-commits

MaskRay wrote:

Add the test to `clang/test/CodeGen/catch-undef-behavior.c`  
ca810073b3e4cef8ed58c03dcc724771f8f8615b
```
+  /// Casting to void * or char * drops the alignment requirement.
+  memcpy((void *)p, (char *)q, sz);
```

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-09 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/67766

>From ca810073b3e4cef8ed58c03dcc724771f8f8615b Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 28 Sep 2023 15:22:38 -0700
Subject: [PATCH] -fsanitize=alignment: check memcpy/memmove arguments

The -fsanitize=alignment implementation follows the model that we allow
forming unaligned pointers but disallow accessing unaligned pointers.
See [RFC: Enforcing pointer type alignment in 
Clang](https://lists.llvm.org/pipermail/llvm-dev/2016-January/094012.html)
for detail.

memcpy is a memory access and we require an `int *` argument to be aligned.
Similar to https://reviews.llvm.org/D9673 , emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check the alignment of a but ignore the alignment of b
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The alignment check of a will be optimized out.
void unaligned_load(int , void *b) { memcpy(, b, sizeof(a)); }
```

The diagnostic message looks like
```
runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *'
```

We could use a better message for memcpy, but we don't do it for now as it would
require a new check name like misaligned-pointer-use, which is probably not
necessary. *RFC: Enforcing pointer type alignment in Clang* is not well 
documented,
but this patch does not intend to change the that.

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.
---
 clang/include/clang/Basic/Sanitizers.h|  2 +
 clang/lib/CodeGen/CGBuiltin.cpp   | 39 +++---
 clang/test/CodeGen/catch-undef-behavior.c | 77 ++-
 .../ubsan/TestCases/TypeCheck/misaligned.cpp  | 23 ++
 4 files changed, 127 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/Sanitizers.h 
b/clang/include/clang/Basic/Sanitizers.h
index 8fdaf2e4ba8ab18..c890242269b3348 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
 Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f68aa2c953c74b..8cb7943df9a7822 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 }
   }
 
+  // Check NonnullAttribute/NullabilityArg and Alignment.
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+  unsigned ParmNum) {
+Value *Val = A.getPointer();
+EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), 
FD,
+ParmNum);
+
+if (SanOpts.has(SanitizerKind::Alignment)) {
+  SanitizerSet SkippedChecks;
+  SkippedChecks.set(SanitizerKind::All);
+  SkippedChecks.clear(SanitizerKind::Alignment);
+  SourceLocation Loc = Arg->getExprLoc();
+  // Strip an implicit cast.
+  if (auto *CE = dyn_cast(Arg))
+if (CE->getCastKind() == CK_BitCast)
+  Arg = CE->getSubExpr();
+  EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
+SkippedChecks);
+}
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3741,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
 if (BuiltinID == Builtin::BImempcpy ||
 BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3757,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 uint64_t Size =
 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 

[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-09 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay edited 
https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-09 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay edited 
https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-09 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/67766

>From 24d675673844f22e8aef8dc183874696216abb1d Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 28 Sep 2023 15:22:38 -0700
Subject: [PATCH 1/2] -fsanitize=alignment: check memcpy/memmove arguments

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int , void *b) { memcpy(, b, sizeof(a)); }
```

```
runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *'
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.
---
 clang/include/clang/Basic/Sanitizers.h|  2 +
 clang/lib/CodeGen/CGBuiltin.cpp   | 32 +
 clang/test/CodeGen/catch-undef-behavior.c | 68 ++-
 .../ubsan/TestCases/TypeCheck/misaligned.cpp  | 23 +++
 4 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/Sanitizers.h 
b/clang/include/clang/Basic/Sanitizers.h
index 8fdaf2e4ba8ab18..c890242269b3348 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
 Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f68aa2c953c74b..b4b0d5cb670f7c1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 }
   }
 
+  // Check NonnullAttribute/NullabilityArg and Alignment.
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+  unsigned ParmNum) {
+Value *Val = A.getPointer();
+EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), 
FD,
+ParmNum);
+
+SanitizerSet SkippedChecks;
+SkippedChecks.set(SanitizerKind::All);
+SkippedChecks.clear(SanitizerKind::Alignment);
+EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+  A.getAlignment(), SkippedChecks);
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3734,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
 if (BuiltinID == Builtin::BImempcpy ||
 BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3750,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 uint64_t Size =
 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpyInline(Dest, Src, Size);
 return RValue::get(nullptr);
   }
@@ -3798,10 +3808,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+

[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-08 Thread Fangrui Song via cfe-commits

MaskRay wrote:

> @zygoloid Thanks for the explanation! I wasn't aware this fell under 
> unspecified behavior. It's weird that alignment information can survive a 
> `void *` cast, but it does make some sense.

Yes, `(void *)x` decreases the alignment requirement to 1 byte like `(char *)x`.

> What seems worrying here is that apparently GCC and Clang do not agree on 
> semantics in this case 
> ([godbolt.org/z/1r9df5a4a](https://godbolt.org/z/1r9df5a4a)). GCC does not 
> assume that the pointers are aligned. The perils of unspecified behavior

Yes...

I am out of town and will merge this PR later.

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-02 Thread Nikita Popov via cfe-commits

nikic wrote:

@zygoloid Thanks for the explanation! I wasn't aware this fell under 
unspecified behavior. It's weird that alignment information can survive a `void 
*` cast, but it does make some sense.

What seems worrying here is that apparently GCC and Clang do not agree on 
semantics in this case (https://godbolt.org/z/1r9df5a4a). GCC does not assume 
that the pointers are aligned. The perils of unspecified behavior

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-02 Thread Fangrui Song via cfe-commits

MaskRay wrote:

> > @zygoloid Is reusing the message for regular stores clear (current 
> > behavior) enough?
> > ```
> > // CHECK-MEMCPY-STORE: misaligned.cpp:[[#@LINE+4]]{{(:12)?}}: runtime 
> > error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *', 
> > which requires 4 byte alignment
> > ```
> 
> I predict we'll get a bug report saying "I didn't do a store, I used 
> `memcpy`, which is specified as doing a byte-by-byte copy". But given that 
> improving this would require adding a new entry point into the runtime 
> library, and we don't know how common or rare this situation will be, I think 
> it's probably OK to wait until someone complains.

Thanks. Let's wait until someone complains :)

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-10-01 Thread Richard Smith via cfe-commits

zygoloid wrote:

> @zygoloid Is reusing the message for regular stores clear (current behavior) 
> enough?
> 
> ```
> // CHECK-MEMCPY-STORE: misaligned.cpp:[[#@LINE+4]]{{(:12)?}}: runtime error: 
> store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *', which 
> requires 4 byte alignment
> ```

I predict we'll get a bug report saying "I didn't do a store, I used `memcpy`, 
which is specified as doing a byte-by-byte copy". But given that improving this 
would require adding a new entry point into the runtime library, and we don't 
know how common or rare this situation will be, I think it's probably OK to 
wait until someone complains.

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-30 Thread Fangrui Song via cfe-commits

MaskRay wrote:

Thanks for the comment.

> I think the choice we're making here is probably worth it, though we should 
> probably document it better. I think you can remove the alignment assumption 
> by explicitly casting the operands to char* before passing them to memcpy; if 
> you can't, I'd be more worried that we're doing something problematic here. 

Yes. The correct implementation correctly drops the alignment check on the dst 
parameter for `memcpy((char *)a, b, sz);`

> Also, it'd seem like a good idea to make the sanitizer message as clear as 
> possible for this case, because Clang's behavior here is surprising.

@zygoloid 
Is reusing the message for regular stores clear (current behavior) enough?
```
// CHECK-MEMCPY-STORE: misaligned.cpp:[[#@LINE+4]]{{(:12)?}}: runtime error: 
store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *', which 
requires 4 byte alignment
```


https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Richard Smith via cfe-commits

zygoloid wrote:

> Uh, why are we allowed to assume that memcpy pointer arguments are aligned? 
> This looks like a miscompile to me.

This is definitely a bit weird, but...

> A plain `int *` pointer is not required to be aligned, and memcpy works on 
> `void *` pointers, so I'm not sure where an alignment requirement would 
> appear from.

... a plain `int *` pointer *is* sort of required to be aligned. Per 
https://eel.is/c++draft/expr.static.cast#14.sentence-2 (and similar wording 
exists in C), you get an unspecified pointer value if you cast an unaligned 
pointer to `int *`, which is allowed to be an invalid pointer value, which we 
are then permitted to give whatever semantics we like, such as disallowing it 
being passed to `memcpy`.

Clang generally chooses to define the semantics of misaligned pointer values as 
doing the "obvious" thing (@rjmccall circulated an RFC on this a few years 
ago). In short: we allow them to be formed, and we allow pointer arithmetic on 
them, but don't allow them to be used to actually access misaligned memory. But 
`memcpy` is a memory access, so if you directly pass an `int *` to it, I think 
we're following our rules if we require it to be aligned. And it seems like a 
valuable thing to assume: in the vast majority of cases, it is reasonable to 
assume `T`'s alignment when a `T*` is passed to `memcpy`.

I think the choice we're making here is probably worth it, though we should 
probably document it better. I think you can remove the alignment assumption by 
explicitly casting the operands to `char*` before passing them to `memcpy`; if 
you can't, I'd be more worried that we're doing something problematic here. 
Also, it'd seem like a good idea to make the sanitizer message as clear as 
possible for this case, because Clang's behavior here is surprising.

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits

https://github.com/vitalybuka approved this pull request.


https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits

vitalybuka wrote:

LGTM, but you probably want @rjmccall or @zygoloid review.

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Nikita Popov via cfe-commits

nikic wrote:

Uh, why are we allowed to assume that memcpy pointer arguments are aligned? 
This looks like a miscompile to me.

A plain `int *` pointer is not required to be aligned, and memcpy works on 
`void *` pointers, so I'm not sure where an alignment requirement would appear 
from.

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay edited 
https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Fangrui Song via cfe-commits


@@ -47,6 +50,16 @@ int main(int, char **argv) {
 return *p && 0;
 // CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
 
+  case 'L': {
+int x;
+// CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime 
error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', 
which requires 4 byte alignment

MaskRay wrote:

Yes, the issue was mentioned in the description
```
The error message refers to void * instead of int *, which is not perfect
but should be acceptable.

runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'void *'
```

Anyhow, `BI__builtin_HEXAGON_V6_vmaskedstoreq` strips an implicit cast. I added 
a similar mechanism in 9b705127b17513ee17e76fb903e85500c832d8aa

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/67766

>From 993c23c8cce72e3e1b4ad0924d96c9eac1955d85 Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 28 Sep 2023 15:22:38 -0700
Subject: [PATCH 1/2] -fsanitize=alignment: check memcpy/memmove arguments

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int , void *b) { memcpy(, b, sizeof(a)); }
```

```
runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *'
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.
---
 clang/include/clang/Basic/Sanitizers.h|  2 +
 clang/lib/CodeGen/CGBuiltin.cpp   | 32 +
 clang/test/CodeGen/catch-undef-behavior.c | 68 ++-
 .../ubsan/TestCases/TypeCheck/misaligned.cpp  | 23 +++
 4 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/Sanitizers.h 
b/clang/include/clang/Basic/Sanitizers.h
index 4659e45c7883419..16d241cbf809bbe 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
 Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f68aa2c953c74b..b4b0d5cb670f7c1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 }
   }
 
+  // Check NonnullAttribute/NullabilityArg and Alignment.
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+  unsigned ParmNum) {
+Value *Val = A.getPointer();
+EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), 
FD,
+ParmNum);
+
+SanitizerSet SkippedChecks;
+SkippedChecks.set(SanitizerKind::All);
+SkippedChecks.clear(SanitizerKind::Alignment);
+EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+  A.getAlignment(), SkippedChecks);
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3734,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
 if (BuiltinID == Builtin::BImempcpy ||
 BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3750,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 uint64_t Size =
 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpyInline(Dest, Src, Size);
 return RValue::get(nullptr);
   }
@@ -3798,10 +3808,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+

[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/67766

>From 993c23c8cce72e3e1b4ad0924d96c9eac1955d85 Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 28 Sep 2023 15:22:38 -0700
Subject: [PATCH 1/2] -fsanitize=alignment: check memcpy/memmove arguments

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int , void *b) { memcpy(, b, sizeof(a)); }
```

```
runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *'
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.
---
 clang/include/clang/Basic/Sanitizers.h|  2 +
 clang/lib/CodeGen/CGBuiltin.cpp   | 32 +
 clang/test/CodeGen/catch-undef-behavior.c | 68 ++-
 .../ubsan/TestCases/TypeCheck/misaligned.cpp  | 23 +++
 4 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/Sanitizers.h 
b/clang/include/clang/Basic/Sanitizers.h
index 4659e45c7883419..16d241cbf809bbe 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
 Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f68aa2c953c74b..b4b0d5cb670f7c1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 }
   }
 
+  // Check NonnullAttribute/NullabilityArg and Alignment.
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+  unsigned ParmNum) {
+Value *Val = A.getPointer();
+EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), 
FD,
+ParmNum);
+
+SanitizerSet SkippedChecks;
+SkippedChecks.set(SanitizerKind::All);
+SkippedChecks.clear(SanitizerKind::Alignment);
+EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+  A.getAlignment(), SkippedChecks);
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3734,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
 if (BuiltinID == Builtin::BImempcpy ||
 BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3750,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 uint64_t Size =
 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpyInline(Dest, Src, Size);
 return RValue::get(nullptr);
   }
@@ -3798,10 +3808,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+

[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits


@@ -47,6 +50,16 @@ int main(int, char **argv) {
 return *p && 0;
 // CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
 
+  case 'L': {
+int x;
+// CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime 
error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', 
which requires 4 byte alignment

vitalybuka wrote:

Maybe not wrong, but unhelpful.
Issue is that 'int*' was misaligned, not that we pass it into void*.



https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits


@@ -47,6 +50,16 @@ int main(int, char **argv) {
 return *p && 0;
 // CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
 
+  case 'L': {
+int x;
+// CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime 
error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', 
which requires 4 byte alignment

vitalybuka wrote:

'const void *' is wrong here :(

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits

https://github.com/vitalybuka deleted 
https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits

https://github.com/vitalybuka deleted 
https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits


@@ -47,6 +50,16 @@ int main(int, char **argv) {
 return *p && 0;
 // CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
 
+  case 'L': {
+int x;
+// CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime 
error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', 
which requires 4 byte alignment

vitalybuka wrote:

Never mind, I see.

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Vitaly Buka via cfe-commits


@@ -47,6 +50,16 @@ int main(int, char **argv) {
 return *p && 0;
 // CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
 
+  case 'L': {
+int x;
+// CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime 
error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', 
which requires 4 byte alignment

vitalybuka wrote:

Actually in this case I'd rather ask, why compiler believe that "p" suppose to 
be aligned?

https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay edited 
https://github.com/llvm/llvm-project/pull/67766
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-29 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/67766

>From b1201f3fd4d758b739b04e3d5e474549917added Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 28 Sep 2023 15:22:38 -0700
Subject: [PATCH] -fsanitize=alignment: check memcpy/memmove arguments

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int , void *b) { memcpy(, b, sizeof(a)); }
```

The error message refers to `void *` instead of `int *`, which is not perfect
but should be acceptable.
```
runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'void *'
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.
---
 clang/include/clang/Basic/Sanitizers.h|  2 +
 clang/lib/CodeGen/CGBuiltin.cpp   | 32 +
 clang/test/CodeGen/catch-undef-behavior.c | 68 ++-
 .../ubsan/TestCases/TypeCheck/misaligned.cpp  | 23 +++
 4 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/Sanitizers.h 
b/clang/include/clang/Basic/Sanitizers.h
index 4659e45c7883419..16d241cbf809bbe 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
 Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b0fd38408806566..ec1cdde8a322a18 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 }
   }
 
+  // Check NonnullAttribute/NullabilityArg and Alignment.
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+  unsigned ParmNum) {
+Value *Val = A.getPointer();
+EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), 
FD,
+ParmNum);
+
+SanitizerSet SkippedChecks;
+SkippedChecks.set(SanitizerKind::All);
+SkippedChecks.clear(SanitizerKind::Alignment);
+EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+  A.getAlignment(), SkippedChecks);
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3734,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
 if (BuiltinID == Builtin::BImempcpy ||
 BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3750,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 uint64_t Size =
 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpyInline(Dest, Src, Size);
 return RValue::get(nullptr);
   }
@@ -3798,10 +3808,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), 

[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-28 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang


Changes

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int a, void *b) { memcpy(a, b, sizeof(a)); }
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.


---
Full diff: https://github.com/llvm/llvm-project/pull/67766.diff


3 Files Affected:

- (modified) clang/include/clang/Basic/Sanitizers.h (+2) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+19-12) 
- (modified) clang/test/CodeGen/catch-undef-behavior.c (+66-2) 


``diff
diff --git a/clang/include/clang/Basic/Sanitizers.h 
b/clang/include/clang/Basic/Sanitizers.h
index 4659e45c7883419..16d241cbf809bbe 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
 Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b0fd38408806566..2102d0aaff9d620 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 }
   }
 
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+  unsigned ParmNum) {
+Value *Val = A.getPointer();
+EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), 
FD,
+ParmNum);
+
+SanitizerSet SkippedChecks;
+SkippedChecks.set(SanitizerKind::All);
+SkippedChecks.clear(SanitizerKind::Alignment);
+EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+  A.getAlignment(), SkippedChecks);
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3733,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
 if (BuiltinID == Builtin::BImempcpy ||
 BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3749,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 uint64_t Size =
 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpyInline(Dest, Src, Size);
 return RValue::get(nullptr);
   }
@@ -3798,10 +3807,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemMove(Dest, Src, SizeVal, false);
 return RValue::get(Dest.getPointer());
   }
diff --git a/clang/test/CodeGen/catch-undef-behavior.c 
b/clang/test/CodeGen/catch-undef-behavior.c
index ca0df0f002f8912..3956a475f319ea9 100644
--- a/clang/test/CodeGen/catch-undef-behavior.c
+++ 

[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

2023-09-28 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay created 
https://github.com/llvm/llvm-project/pull/67766

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int , void *b) { memcpy(, b, sizeof(a)); }
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.


>From 17d767e4fb56ed02c7031e21ee9a04790c7bc801 Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 28 Sep 2023 15:22:38 -0700
Subject: [PATCH] -fsanitize=alignment: check memcpy/memmove arguments

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int , void *b) { memcpy(, b, sizeof(a)); }
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.
---
 clang/include/clang/Basic/Sanitizers.h|  2 +
 clang/lib/CodeGen/CGBuiltin.cpp   | 31 +++
 clang/test/CodeGen/catch-undef-behavior.c | 68 ++-
 3 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/Sanitizers.h 
b/clang/include/clang/Basic/Sanitizers.h
index 4659e45c7883419..16d241cbf809bbe 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
 Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b0fd38408806566..2102d0aaff9d620 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 }
   }
 
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+  unsigned ParmNum) {
+Value *Val = A.getPointer();
+EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), 
FD,
+ParmNum);
+
+SanitizerSet SkippedChecks;
+SkippedChecks.set(SanitizerKind::All);
+SkippedChecks.clear(SanitizerKind::Alignment);
+EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+  A.getAlignment(), SkippedChecks);
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3733,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest = EmitPointerWithAlignment(E->getArg(0));
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 Value *SizeVal = EmitScalarExpr(E->getArg(2));
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
 if (BuiltinID == Builtin::BImempcpy ||
 BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3749,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Src = EmitPointerWithAlignment(E->getArg(1));
 uint64_t Size =
 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-EmitNonNullArgCheck(RValue::get(Dest.getPointer()), 
E->getArg(0)->getType(),
-E->getArg(0)->getExprLoc(), FD, 0);
-EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-E->getArg(1)->getExprLoc(), FD, 1);
+EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
 Builder.CreateMemCpyInline(Dest, Src, Size);
 return RValue::get(nullptr);
   }
@@ -3798,10 +3807,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 Address Dest =