[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `(DRE.data() + any)`

2023-02-13 Thread Ziqing Luo via Phabricator via cfe-commits
ziqingluo-90 updated this revision to Diff 497057.
ziqingluo-90 added a comment.

Rebased.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

Files:
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
  clang/lib/Analysis/UnsafeBufferUsage.cpp
  
clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp

Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
===
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+int f(unsigned long, void *);
+
+[[clang::unsafe_buffer_usage]]
+int unsafe_f(unsigned long, void *);
+
+void address_to_integer(int x) {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) [5];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)"
+  unsigned long m = (unsigned long) [x];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + x)"
+}
+
+void call_argument(int x) {
+  int * p = new int[10];
+
+  f((unsigned long) [5], [x]);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"(p.data() + 5)"
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"(p.data() + x)"
+}
+
+void ignore_unsafe_calls(int x) {
+  // Cannot fix `[x]` for now as it is an argument of an unsafe
+  // call. So no fix for variable `p`.
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_f((unsigned long) [5],
+	   // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+	   [x]);
+
+  int * q = new int[10];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span q"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}"
+  unsafe_f((unsigned long) [5],
+	   // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"(q.data() + 5)"
+	   (void*)0);
+}
+
+void odd_subscript_form() {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) &5[p];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)"
+}
+
+// To test multiple function declarations, each of which carries
+// different incomplete informations:
+[[clang::unsafe_buffer_usage]]
+void unsafe_g(void*);
+
+void unsafe_g(void*);
+
+void multiple_unsafe_fundecls() {
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_g([5]);
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+}
+
+void unsafe_h(void*);
+
+[[clang::unsafe_buffer_usage]]
+void unsafe_h(void*);
+
+void unsafe_h(void* p) { ((char*)p)[10]; }
+
+void multiple_unsafe_fundecls2() {
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_h([5]);
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+}
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -117,6 +117,15 @@
   bool Matches;
 };
 
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+return hasType(hasCanonicalType(pointerType()));
+}
+
+static auto hasArrayType() {
+return hasType(hasCanonicalType(arrayType()));
+}
+
 AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher, innerMatcher) {
   const DynTypedMatcher  = static_cast(innerMatcher);
   
@@ -149,6 +158,29 @@
 ));
 // clang-format off
 }
+
+// Returns a matcher that matches any expression `e` such that `InnerMatcher`
+// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
+static internal::Matcher
+isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) {
+  // A UPC can be
+  // 1. an argument of a function call (except the callee has [[unsafe_...]]
+  // attribute), or
+  // 2. the operand of a cast operation; or
+  // ...
+  auto CallArgMatcher =
+  callExpr(hasAnyArgument(allOf(
+   hasPointerType() /* array also decays to pointer type*/,
+   InnerMatcher)),
+   unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage);
+  auto CastOperandMatcher =
+  explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral),
+   castSubExpr(allOf(hasPointerType(), InnerMatcher)));
+
+ return stmt(anyOf(CallArgMatcher, CastOperandMatcher));
+  // FIXME: any more cases? (UPC excludes the RHS of an assignment.  For now we
+  // don't have to check that.)
+}
 } // namespace clang::ast_matchers
 
 namespace {
@@ -163,15 +195,6 @@
 class Strategy;
 } // namespace
 
-// Because we're dealing with raw pointers, let's define what we mean by that.
-static auto hasPointerType() {
-return 

[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `(DRE.data() + any)`

2023-02-06 Thread Jan Korous via Phabricator via cfe-commits
jkorous added inline comments.



Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:162
+   InnerMatcher)),
+   unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage);
+  auto CastOperandMatcher =

ziqingluo-90 wrote:
> jkorous wrote:
> > I am just wondering how does the callee matcher work in situation with 
> > multiple re-declarations 樂 
> > 
> > Something like this:
> > ```
> > void foo(int* ptr);
> > [[clang::unsafe_buffer_usage]] void foo(int* ptr);
> > void foo(int* ptr);
> > 
> > void bar(int* ptr) {
> >   foo(ptr);
> > }
> > ```
> I think we are fine.  According to the doc of `FunctionDecl`:
> ```
> /// Represents a function declaration or definition.
> ///
> /// Since a given function can be declared several times in a program,
> /// there may be several FunctionDecls that correspond to that
> /// function. Only one of those FunctionDecls will be found when
> /// traversing the list of declarations in the context of the
> /// FunctionDecl (e.g., the translation unit); this FunctionDecl
> /// contains all of the information known about the function. Other,
> /// previous declarations of the function are available via the
> /// getPreviousDecl() chain.
> ```
I see! Sound like we should be fine indeed and the test seems to confirm.
Thank you!



Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:508
+  hasUnaryOperand(arraySubscriptExpr(
+  hasBase(ignoringParenImpCasts(declRefExpr())
+.bind(UPCAddressofArraySubscriptTag);

ziqingluo-90 wrote:
> jkorous wrote:
> > I am wondering what will happen in the weird corner-case of `&5[ptr]` - I 
> > feel the Fix-It we produce would be incorrect.
> > 
> > Here's a suggestion - we could use `hasLHS` instead of `hasBase` here and 
> > add a FIXME that when we find the time we should also properly support the 
> > corner-case. That would be a pretty low-priority though - we definitely 
> > have more important patterns to support first.
> > 
> > WDYT?
> > 
> I'm not sure if I understand your concern.  For `&5[ptr]`, we will generate a 
> fix-it `ptr.data() + 5` in cases `ptr` is assigned a `span` strategy.   It is 
> same as the case of `[5]`.
Oh, my bad! I assumed (AKA didn't check) that we're just replacing the parts of 
the code around the DRE and index.
You're right. Please ignore me :)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `(DRE.data() + any)`

2023-02-06 Thread Ziqing Luo via Phabricator via cfe-commits
ziqingluo-90 updated this revision to Diff 495325.
ziqingluo-90 retitled this revision from "[-Wunsafe-buffer-usage][WIP] Fix-Its 
transforming `[any]` to `DRE.data() + any`" to 
"[-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `(DRE.data() 
+ any)`".
ziqingluo-90 added a comment.

Let `fixUPCAddressofArraySubscriptWithSpan` return `std::nullopt` instead of an 
empty list when we should give up on the fix-it.

Add a few test cases for some corner cases.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

Files:
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
  clang/lib/Analysis/UnsafeBufferUsage.cpp
  
clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp

Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
===
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+int f(unsigned long, void *);
+
+[[clang::unsafe_buffer_usage]]
+int unsafe_f(unsigned long, void *);
+
+void address_to_integer(int x) {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) [5];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)"
+  unsigned long m = (unsigned long) [x];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + x)"
+}
+
+void call_argument(int x) {
+  int * p = new int[10];
+
+  f((unsigned long) [5], [x]);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"(p.data() + 5)"
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"(p.data() + x)"
+}
+
+void ignore_unsafe_calls(int x) {
+  // Cannot fix `[x]` for now as it is an argument of an unsafe
+  // call. So no fix for variable `p`.
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_f((unsigned long) [5],
+	   // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+	   [x]);
+
+  int * q = new int[10];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span q"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}"
+  unsafe_f((unsigned long) [5],
+	   // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"(q.data() + 5)"
+	   (void*)0);
+}
+
+void odd_subscript_form() {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) &5[p];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)"
+}
+
+// To test multiple function declarations, each of which carries
+// different incomplete informations:
+[[clang::unsafe_buffer_usage]]
+void unsafe_g(void*);
+
+void unsafe_g(void*);
+
+void multiple_unsafe_fundecls() {
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_g([5]);
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+}
+
+void unsafe_h(void*);
+
+[[clang::unsafe_buffer_usage]]
+void unsafe_h(void*);
+
+void unsafe_h(void* p) { ((char*)p)[10]; }
+
+void multiple_unsafe_fundecls2() {
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_h([5]);
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+}
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -110,6 +110,15 @@
   bool Matches;
 };
 
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+return hasType(hasCanonicalType(pointerType()));
+}
+
+static auto hasArrayType() {
+return hasType(hasCanonicalType(arrayType()));
+}
+
 AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher, innerMatcher) {
   const DynTypedMatcher  = static_cast(innerMatcher);
   
@@ -129,6 +138,29 @@
   castSubExpr(innerMatcher));
   // FIXME: add assignmentTo context...
 }
+
+// Returns a matcher that matches any expression `e` such that `InnerMatcher`
+// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
+static internal::Matcher
+isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) {
+  // A UPC can be
+  // 1. an argument of a function call (except the callee has [[unsafe_...]]
+  // attribute), or
+  // 2. the operand of a cast operation; or
+  // ...
+  auto CallArgMatcher =
+  callExpr(hasAnyArgument(allOf(
+   hasPointerType() /* array also decays to pointer type*/,
+   InnerMatcher)),
+   unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage);
+  auto CastOperandMatcher =
+  explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral),
+   castSubExpr(allOf(hasPointerType(), 

[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `DRE.data() + any`

2023-02-06 Thread Ziqing Luo via Phabricator via cfe-commits
ziqingluo-90 added a comment.

In D143128#4108375 , @NoQ wrote:

> Why do we prefer `DRE.data() + any` to `()[any]`? It could be much 
> less intrusive this way, and the safety guarantees are the same.

It is actually `(DRE.data() + any)` versus `()[any]`. Are they quite 
the same in terms of being intrusive?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `DRE.data() + any`

2023-02-06 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

Why do we prefer `DRE.data() + any` to `()[any]`? It could be much 
less intrusive this way, and the safety guarantees are the same.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `DRE.data() + any`

2023-02-06 Thread Ziqing Luo via Phabricator via cfe-commits
ziqingluo-90 added inline comments.



Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:162
+   InnerMatcher)),
+   unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage);
+  auto CastOperandMatcher =

jkorous wrote:
> I am just wondering how does the callee matcher work in situation with 
> multiple re-declarations 樂 
> 
> Something like this:
> ```
> void foo(int* ptr);
> [[clang::unsafe_buffer_usage]] void foo(int* ptr);
> void foo(int* ptr);
> 
> void bar(int* ptr) {
>   foo(ptr);
> }
> ```
I think we are fine.  According to the doc of `FunctionDecl`:
```
/// Represents a function declaration or definition.
///
/// Since a given function can be declared several times in a program,
/// there may be several FunctionDecls that correspond to that
/// function. Only one of those FunctionDecls will be found when
/// traversing the list of declarations in the context of the
/// FunctionDecl (e.g., the translation unit); this FunctionDecl
/// contains all of the information known about the function. Other,
/// previous declarations of the function are available via the
/// getPreviousDecl() chain.
```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `DRE.data() + any`

2023-02-06 Thread Ziqing Luo via Phabricator via cfe-commits
ziqingluo-90 added inline comments.



Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:508
+  hasUnaryOperand(arraySubscriptExpr(
+  hasBase(ignoringParenImpCasts(declRefExpr())
+.bind(UPCAddressofArraySubscriptTag);

jkorous wrote:
> I am wondering what will happen in the weird corner-case of `&5[ptr]` - I 
> feel the Fix-It we produce would be incorrect.
> 
> Here's a suggestion - we could use `hasLHS` instead of `hasBase` here and add 
> a FIXME that when we find the time we should also properly support the 
> corner-case. That would be a pretty low-priority though - we definitely have 
> more important patterns to support first.
> 
> WDYT?
> 
I'm not sure if I understand your concern.  For `&5[ptr]`, we will generate a 
fix-it `ptr.data() + 5` in cases `ptr` is assigned a `span` strategy.   It is 
same as the case of `[5]`.



Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:810
+  case Strategy::Kind::Span:
+return fixUPCAddressofArraySubscriptWithSpan(Node);
+  case Strategy::Kind::Wontfix:

jkorous wrote:
> Since we use `std::nullopt` in `getFixits` to signal errors - we should 
> either use the same strategy in `fixUPCAddressofArraySubscriptWithSpan` or 
> translate the empty return value from it to `nullopt` here.
> (FWIWI I am leaning towards the former.)
> Forwarding the empty Fix-It would be incorrect.
> 
> 
Oh, that's a bug I made!  Thank you for finding it for me.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `DRE.data() + any`

2023-02-06 Thread Jan Korous via Phabricator via cfe-commits
jkorous added inline comments.



Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:162
+   InnerMatcher)),
+   unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage);
+  auto CastOperandMatcher =

I am just wondering how does the callee matcher work in situation with multiple 
re-declarations 樂 

Something like this:
```
void foo(int* ptr);
[[clang::unsafe_buffer_usage]] void foo(int* ptr);
void foo(int* ptr);

void bar(int* ptr) {
  foo(ptr);
}
```



Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:508
+  hasUnaryOperand(arraySubscriptExpr(
+  hasBase(ignoringParenImpCasts(declRefExpr())
+.bind(UPCAddressofArraySubscriptTag);

I am wondering what will happen in the weird corner-case of `&5[ptr]` - I feel 
the Fix-It we produce would be incorrect.

Here's a suggestion - we could use `hasLHS` instead of `hasBase` here and add a 
FIXME that when we find the time we should also properly support the 
corner-case. That would be a pretty low-priority though - we definitely have 
more important patterns to support first.

WDYT?




Comment at: clang/lib/Analysis/UnsafeBufferUsage.cpp:810
+  case Strategy::Kind::Span:
+return fixUPCAddressofArraySubscriptWithSpan(Node);
+  case Strategy::Kind::Wontfix:

Since we use `std::nullopt` in `getFixits` to signal errors - we should either 
use the same strategy in `fixUPCAddressofArraySubscriptWithSpan` or translate 
the empty return value from it to `nullopt` here.
(FWIWI I am leaning towards the former.)
Forwarding the empty Fix-It would be incorrect.




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143128: [-Wunsafe-buffer-usage][WIP] Fix-Its transforming `[any]` to `DRE.data() + any`

2023-02-03 Thread Ziqing Luo via Phabricator via cfe-commits
ziqingluo-90 updated this revision to Diff 494711.
ziqingluo-90 retitled this revision from "[-Wunsafe-buffer-usage][WIP] Fix-Its 
transforming `[*]` to `DRE.data() + *`" to "[-Wunsafe-buffer-usage][WIP] 
Fix-Its transforming `[any]` to `DRE.data() + any`".
ziqingluo-90 edited the summary of this revision.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

Files:
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
  clang/lib/Analysis/UnsafeBufferUsage.cpp
  
clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp

Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
===
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+int f(unsigned long, void *);
+
+[[clang::unsafe_buffer_usage]]
+int unsafe_f(unsigned long, void *);
+
+void address_to_integer(int x) {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) [5];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)"
+  unsigned long m = (unsigned long) [x];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + x)"
+}
+
+void call_argument(int x) {
+  int * p = new int[10];
+
+  f((unsigned long) [5], [x]);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"(p.data() + 5)"
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"(p.data() + x)"
+}
+
+void ignore_unsafe_calls(int x) {
+  // Cannot fix `[x]` for now as it is an argument of an unsafe
+  // call. So no fix for variable `p`.
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_f((unsigned long) [5],
+	   // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+	   [x]);
+
+  int * q = new int[10];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span q"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}"
+  unsafe_f((unsigned long) [5],
+	   // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"(q.data() + 5)"
+	   (void*)0);
+}
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -20,6 +20,15 @@
 using namespace ast_matchers;
 
 namespace clang::ast_matchers {
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+return hasType(hasCanonicalType(pointerType()));
+}
+
+static auto hasArrayType() {
+return hasType(hasCanonicalType(arrayType()));
+}
+
 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
 // except for those belonging to a different callable of "n".
 class MatchDescendantVisitor
@@ -136,6 +145,29 @@
   castSubExpr(innerMatcher));
   // FIXME: add assignmentTo context...
 }
+
+// Returns a matcher that matches any expression `e` such that `InnerMatcher`
+// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
+static internal::Matcher
+isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) {
+  // A UPC can be
+  // 1. an argument of a function call (except the callee has [[unsafe_...]]
+  // attribute), or
+  // 2. the operand of a cast operation; or
+  // ...
+  auto CallArgMatcher =
+  callExpr(hasAnyArgument(allOf(
+   hasPointerType() /* array also decays to pointer type*/,
+   InnerMatcher)),
+   unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage);
+  auto CastOperandMatcher =
+  explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral),
+   castSubExpr(allOf(hasPointerType(), InnerMatcher)));
+
+  return stmt(anyOf(CallArgMatcher, CastOperandMatcher));
+  // FIXME: any more cases? (UPC excludes the RHS of an assignment.  For now we
+  // don't have to check that.)
+}
 } // namespace clang::ast_matchers
 
 namespace {
@@ -150,15 +182,6 @@
 class Strategy;
 } // namespace
 
-// Because we're dealing with raw pointers, let's define what we mean by that.
-static auto hasPointerType() {
-return hasType(hasCanonicalType(pointerType()));
-}
-
-static auto hasArrayType() {
-return hasType(hasCanonicalType(arrayType()));
-}
-
 namespace {
 /// Gadget is an individual operation in the code that may be of interest to
 /// this analysis. Each (non-abstract) subclass corresponds to a specific
@@ -456,6 +479,50 @@
 return {};
   }
 };
+
+// Represents expressions of the form `[any]` in the Unspecified Pointer
+// Context (see `isInUnspecifiedPointerContext`).
+// Note here `[]` is the built-in