[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)

2024-09-22 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/109496
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)

2024-09-20 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/109496

>From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 16:27:09 -0700
Subject: [PATCH 1/4] [-Wunsafe-buffer-usage] Fix a bug and suppress libc
 warnings for C files

- Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType
- Suppress -Wunsafe-buffer-usage-in-libc-call for C files

(rdar://117182250)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +---
 clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++-
 .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 +
 3 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index a16762244b1766..110a121e71a7d2 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
 
 AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *,
   Handler) {
-  return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  if (Finder->getASTContext().getLangOpts().CPlusPlus)
+return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  return true; /* Only warn about libc calls for C++ */
 }
 
 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) {
@@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
   if (!FristParmTy->isPointerType())
 return false; // possibly some user-defined printf function
 
-  QualType FirstPteTy = (cast(FristParmTy))->getPointeeType();
+  QualType FirstPteTy = FristParmTy->getAs()->getPointeeType();
 
   if (!Ctx.getFILEType()
.isNull() && //`FILE *` must be in the context if it is fprintf
@@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   if (!FirstParmTy->isPointerType())
 return false; // Not an snprint
 
-  QualType FirstPteTy = cast(FirstParmTy)->getPointeeType();
+  QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType();
   const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
 
   if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 117b2c8bc57935..7e0b929abea683 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
 
   // UnsafeBufferUsage analysis settings.
+  bool IsCXXLang = S.getLangOpts().CPlusPlus;
   bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20;
   bool UnsafeBufferUsageShouldEmitSuggestions =  // Should != Can.
   UnsafeBufferUsageCanEmitSuggestions &&
@@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) ||
   !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container,
SourceLocation()) ||
-  !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) {
+  (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) 
&&
+   IsCXXLang)) {
 CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU);
   }
 }
diff --git 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
new file mode 100644
index 00..e305c3e140dff9
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s
+
+void* memcpy(void *dst,const void *src, unsigned long size);
+
+void f(int *p, int *q) {
+
+  memcpy(p, q, 10); // no libc warn in C
+  ++p[5];   // expected-warning{{unsafe buffer access}}
+}

>From 42664f03f16a99aa17bb33f473a01db73af796f5 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 18:53:28 -0700
Subject: [PATCH 2/4] Add tests and fix a typo

---
 clang/lib/Analysis/UnsafeBufferUsage.cpp   |  6 +++---
 .../warn-unsafe-buffer-usage-libc-functions.cpp| 10 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 110a121e71a7d2..6c1979179711b9 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -786,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
 return false; // possibly some user-defined printf function
 
   ASTContext &Ctx = Finder->getASTContext();
-  QualType FristParmTy = FD->getParamDecl(0)->getT

[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)

2024-09-20 Thread Ziqing Luo via cfe-commits


@@ -784,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
 return false; // possibly some user-defined printf function
 
   ASTContext &Ctx = Finder->getASTContext();
-  QualType FristParmTy = FD->getParamDecl(0)->getType();
+  QualType FirstParmTy = FD->getParamDecl(0)->getType();
 
-  if (!FristParmTy->isPointerType())
+  if (!FirstParmTy->isPointerType())
 return false; // possibly some user-defined printf function
 
-  QualType FirstPteTy = (cast(FristParmTy))->getPointeeType();
+  QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType();

ziqingluo-90 wrote:

The difference between `castAs` and `getAs` is that `castAs` asserts the 
canonical type is `PointerType` while `getAs` returns null if not.  I think 
`castAs` is more suitable for our case.

https://github.com/llvm/llvm-project/pull/109496
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)

2024-09-20 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/109496

>From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 16:27:09 -0700
Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Fix a bug and suppress libc
 warnings for C files

- Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType
- Suppress -Wunsafe-buffer-usage-in-libc-call for C files

(rdar://117182250)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +---
 clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++-
 .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 +
 3 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index a16762244b1766..110a121e71a7d2 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
 
 AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *,
   Handler) {
-  return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  if (Finder->getASTContext().getLangOpts().CPlusPlus)
+return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  return true; /* Only warn about libc calls for C++ */
 }
 
 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) {
@@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
   if (!FristParmTy->isPointerType())
 return false; // possibly some user-defined printf function
 
-  QualType FirstPteTy = (cast(FristParmTy))->getPointeeType();
+  QualType FirstPteTy = FristParmTy->getAs()->getPointeeType();
 
   if (!Ctx.getFILEType()
.isNull() && //`FILE *` must be in the context if it is fprintf
@@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   if (!FirstParmTy->isPointerType())
 return false; // Not an snprint
 
-  QualType FirstPteTy = cast(FirstParmTy)->getPointeeType();
+  QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType();
   const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
 
   if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 117b2c8bc57935..7e0b929abea683 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
 
   // UnsafeBufferUsage analysis settings.
+  bool IsCXXLang = S.getLangOpts().CPlusPlus;
   bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20;
   bool UnsafeBufferUsageShouldEmitSuggestions =  // Should != Can.
   UnsafeBufferUsageCanEmitSuggestions &&
@@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) ||
   !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container,
SourceLocation()) ||
-  !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) {
+  (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) 
&&
+   IsCXXLang)) {
 CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU);
   }
 }
diff --git 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
new file mode 100644
index 00..e305c3e140dff9
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s
+
+void* memcpy(void *dst,const void *src, unsigned long size);
+
+void f(int *p, int *q) {
+
+  memcpy(p, q, 10); // no libc warn in C
+  ++p[5];   // expected-warning{{unsafe buffer access}}
+}

>From 42664f03f16a99aa17bb33f473a01db73af796f5 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 18:53:28 -0700
Subject: [PATCH 2/3] Add tests and fix a typo

---
 clang/lib/Analysis/UnsafeBufferUsage.cpp   |  6 +++---
 .../warn-unsafe-buffer-usage-libc-functions.cpp| 10 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 110a121e71a7d2..6c1979179711b9 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -786,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
 return false; // possibly some user-defined printf function
 
   ASTContext &Ctx = Finder->getASTContext();
-  QualType FristParmTy = FD->getParamDecl(0)->getT

[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)

2024-09-20 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/109496

>From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 16:27:09 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Fix a bug and suppress libc
 warnings for C files

- Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType
- Suppress -Wunsafe-buffer-usage-in-libc-call for C files

(rdar://117182250)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +---
 clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++-
 .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 +
 3 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index a16762244b1766..110a121e71a7d2 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
 
 AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *,
   Handler) {
-  return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  if (Finder->getASTContext().getLangOpts().CPlusPlus)
+return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  return true; /* Only warn about libc calls for C++ */
 }
 
 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) {
@@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
   if (!FristParmTy->isPointerType())
 return false; // possibly some user-defined printf function
 
-  QualType FirstPteTy = (cast(FristParmTy))->getPointeeType();
+  QualType FirstPteTy = FristParmTy->getAs()->getPointeeType();
 
   if (!Ctx.getFILEType()
.isNull() && //`FILE *` must be in the context if it is fprintf
@@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   if (!FirstParmTy->isPointerType())
 return false; // Not an snprint
 
-  QualType FirstPteTy = cast(FirstParmTy)->getPointeeType();
+  QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType();
   const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
 
   if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 117b2c8bc57935..7e0b929abea683 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
 
   // UnsafeBufferUsage analysis settings.
+  bool IsCXXLang = S.getLangOpts().CPlusPlus;
   bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20;
   bool UnsafeBufferUsageShouldEmitSuggestions =  // Should != Can.
   UnsafeBufferUsageCanEmitSuggestions &&
@@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) ||
   !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container,
SourceLocation()) ||
-  !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) {
+  (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) 
&&
+   IsCXXLang)) {
 CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU);
   }
 }
diff --git 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
new file mode 100644
index 00..e305c3e140dff9
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s
+
+void* memcpy(void *dst,const void *src, unsigned long size);
+
+void f(int *p, int *q) {
+
+  memcpy(p, q, 10); // no libc warn in C
+  ++p[5];   // expected-warning{{unsafe buffer access}}
+}

>From 42664f03f16a99aa17bb33f473a01db73af796f5 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 18:53:28 -0700
Subject: [PATCH 2/2] Add tests and fix a typo

---
 clang/lib/Analysis/UnsafeBufferUsage.cpp   |  6 +++---
 .../warn-unsafe-buffer-usage-libc-functions.cpp| 10 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 110a121e71a7d2..6c1979179711b9 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -786,12 +786,12 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
 return false; // possibly some user-defined printf function
 
   ASTContext &Ctx = Finder->getASTContext();
-  QualType FristParmTy = FD->getParamDecl(0)->getT

[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)

2024-09-20 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/109496

>From e7f7f82b25eaae86623ac8f47731892b3b629d7d Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 16:27:09 -0700
Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings
 for C files

- Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType
- Suppress -Wunsafe-buffer-usage-in-libc-call for C files

(rdar://117182250)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +---
 clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +++-
 .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 +
 3 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index a16762244b1766..110a121e71a7d2 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
 
 AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *,
   Handler) {
-  return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  if (Finder->getASTContext().getLangOpts().CPlusPlus)
+return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  return true; /* Only warn about libc calls for C++ */
 }
 
 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) {
@@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
   if (!FristParmTy->isPointerType())
 return false; // possibly some user-defined printf function
 
-  QualType FirstPteTy = (cast(FristParmTy))->getPointeeType();
+  QualType FirstPteTy = FristParmTy->getAs()->getPointeeType();
 
   if (!Ctx.getFILEType()
.isNull() && //`FILE *` must be in the context if it is fprintf
@@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   if (!FirstParmTy->isPointerType())
 return false; // Not an snprint
 
-  QualType FirstPteTy = cast(FirstParmTy)->getPointeeType();
+  QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType();
   const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
 
   if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 117b2c8bc57935..7e0b929abea683 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
 
   // UnsafeBufferUsage analysis settings.
+  bool IsCXXLang = S.getLangOpts().CPlusPlus;
   bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20;
   bool UnsafeBufferUsageShouldEmitSuggestions =  // Should != Can.
   UnsafeBufferUsageCanEmitSuggestions &&
@@ -2581,7 +2582,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) ||
   !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container,
SourceLocation()) ||
-  !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) {
+  (!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation()) 
&&
+   IsCXXLang)) {
 CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU);
   }
 }
diff --git 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
new file mode 100644
index 00..e305c3e140dff9
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s
+
+void* memcpy(void *dst,const void *src, unsigned long size);
+
+void f(int *p, int *q) {
+
+  memcpy(p, q, 10); // no libc warn in C
+  ++p[5];   // expected-warning{{unsafe buffer access}}
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings for C files (PR #109496)

2024-09-20 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/109496

- Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType (report 
by @hnrklssn 
[here](https://github.com/llvm/llvm-project/pull/101583#discussion_r1767705992))
- Suppress -Wunsafe-buffer-usage-in-libc-call for C files


>From e261352db529e221876e4a0d6b255a734d26336f Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Fri, 20 Sep 2024 16:27:09 -0700
Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug and suppress libc warnings
 for C files

- Fix a bug in UnsafeBufferUsage.cpp related to casting to PointerType
- Suppress -Wunsafe-buffer-usage-in-libc-call for C files

(rdar://117182250)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 8 +---
 clang/lib/Sema/AnalysisBasedWarnings.cpp | 6 --
 .../warn-unsafe-buffer-usage-no-libc-functions-in-c.c| 9 +
 3 files changed, 18 insertions(+), 5 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index a16762244b1766..110a121e71a7d2 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -250,7 +250,9 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
 
 AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *,
   Handler) {
-  return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  if (Finder->getASTContext().getLangOpts().CPlusPlus)
+return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc());
+  return true; /* Only warn about libc calls for C++ */
 }
 
 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher, innerMatcher) {
@@ -789,7 +791,7 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
   if (!FristParmTy->isPointerType())
 return false; // possibly some user-defined printf function
 
-  QualType FirstPteTy = (cast(FristParmTy))->getPointeeType();
+  QualType FirstPteTy = FristParmTy->getAs()->getPointeeType();
 
   if (!Ctx.getFILEType()
.isNull() && //`FILE *` must be in the context if it is fprintf
@@ -865,7 +867,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   if (!FirstParmTy->isPointerType())
 return false; // Not an snprint
 
-  QualType FirstPteTy = cast(FirstParmTy)->getPointeeType();
+  QualType FirstPteTy = FirstParmTy->getAs()->getPointeeType();
   const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1);
 
   if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 117b2c8bc57935..724b543cdf882c 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2549,6 +2549,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
 
   // UnsafeBufferUsage analysis settings.
+  bool IsCXXLang = S.getLangOpts().CPlusPlus;
   bool UnsafeBufferUsageCanEmitSuggestions = S.getLangOpts().CPlusPlus20;
   bool UnsafeBufferUsageShouldEmitSuggestions =  // Should != Can.
   UnsafeBufferUsageCanEmitSuggestions &&
@@ -2567,8 +2568,9 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
  Node->getBeginLoc()) ||
 !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container,
  Node->getBeginLoc()) ||
-!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call,
- Node->getBeginLoc())) {
+(!Diags.isIgnored(diag::warn_unsafe_buffer_libc_call,
+  Node->getBeginLoc()) &&
+ IsCXXLang /* we only warn about libc calls in C++ files */)) {
   clang::checkUnsafeBufferUsage(Node, R,
 UnsafeBufferUsageShouldEmitSuggestions);
 }
diff --git 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
new file mode 100644
index 00..e305c3e140dff9
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-no-libc-functions-in-c.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -Wunsafe-buffer-usage %s -verify %s
+
+void* memcpy(void *dst,const void *src, unsigned long size);
+
+void f(int *p, int *q) {
+
+  memcpy(p, q, 10); // no libc warn in C
+  ++p[5];   // expected-warning{{unsafe buffer access}}
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)

2024-09-13 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/108308
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)

2024-09-11 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/108308

>From 3c8830a0e69922faf4fad190ba0b2e01a3392e62 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Tue, 6 Aug 2024 17:54:23 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Reduce false positives with
 constant arrays in libc warnings

For `snprintf(a, sizeof a, ...)`, the first two arguments form a safe
pattern if `a` is a constant array.  In such a case, this commit will
suppress the warning.

(rdar://117182250)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 27 ++-
 ...arn-unsafe-buffer-usage-libc-functions.cpp | 14 +-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 21d4368151eb47..3451ba7cc34901 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -668,7 +668,7 @@ AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) {
 
   auto *II = Node.getIdentifier();
 
-  if (!II)
+ if (!II)
 return false;
 
   StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
@@ -833,9 +833,16 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
 //
 // For the first two arguments: `ptr` and `size`, they are safe if in the
 // following patterns:
+//
+// Pattern 1:
 //ptr := DRE.data();
 //size:= DRE.size()/DRE.size_bytes()
 // And DRE is a hardened container or view.
+//
+// Pattern 2:
+//ptr := Constant-Array-DRE;
+//size:= any expression that has compile-time constant value equivalent to
+//   sizeof (Constant-Array-DRE)
 AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   const FunctionDecl *FD = Node.getDirectCallee();
 
@@ -856,6 +863,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   !Size->getType()->isIntegerType())
 return false; // not an snprintf call
 
+  // Pattern 1:
   static StringRef SizedObjs[] = {"span", "array", "vector",
   "basic_string_view", "basic_string"};
   Buf = Buf->IgnoreParenImpCasts();
@@ -886,6 +894,23 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   SizedObj)
 return false; // It is in fact safe
 }
+
+  // Pattern 2:
+  if (auto *DRE = dyn_cast(Buf->IgnoreParenImpCasts())) {
+ASTContext &Ctx = Finder->getASTContext();
+
+if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) {
+  Expr::EvalResult ER;
+  // The array element type must be compatible with `char` otherwise an
+  // explicit cast will be needed, which will make this check unreachable.
+  // Therefore, the array extent is same as its' bytewise size.
+  if (Size->EvaluateAsConstantExpr(ER, Ctx)) {
+APSInt EVal = ER.Val.getInt(); // Size must have integer type
+
+return APSInt::compareValues(EVal, APSInt(CAT->getSize(), true)) != 0;
+  }
+}
+  }
   return true; // ptr and size are not in safe pattern
 }
 } // namespace libc_func_matchers
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index a3716073609f6f..8e777a7a51c994 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -83,6 +83,12 @@ void f(char * p, char * q, std::span s, 
std::span s2) {
   sscanf(p, "%s%d", "hello", *p);// expected-warning{{function 'sscanf' is 
unsafe}}
   sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function 'sscanf_s' 
is unsafe}}
   fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // 
expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument 
is not guaranteed to be null-terminated}}
+
+  char a[10], b[11];
+  int c[10];
+
+  snprintf(a, sizeof(b), "%s", __PRETTY_FUNCTION__); // 
expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer 
and size may not match}}
+  snprintf((char*)c, sizeof(c), "%s", __PRETTY_FUNCTION__);  // 
expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer 
and size may not match}}
   fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // 
no warn
   printf("%s%d", "hello", *p); // no warn
   snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
@@ -93,13 +99,19 @@ void f(char * p, char * q, std::span s, 
std::span s2) {
   __asan_printf();// a printf but no argument, so no warn
 }
 
-void v(std::string s1, int *p) {
+void safe_examples(std::string s1, int *p) {
   snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, 
"hello", p, s1.c_str()); // no warn
   snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, 
"hello", s1.c_str());  // no warn
   printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str());   
   // no warn
   printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str());   

[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)

2024-09-11 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/108308
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Reduce false positives with constant arrays in libc warnings (PR #108308)

2024-09-11 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/108308

For `snprintf(a, sizeof a, ...)`, the first two arguments form a safe pattern 
if `a` is a constant array.  In such a case, this commit will suppress the 
warning.

(rdar://117182250)

This pattern will effectively remove a lot of false positives in our downstream 
projects.

>From 3c8830a0e69922faf4fad190ba0b2e01a3392e62 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Tue, 6 Aug 2024 17:54:23 -0700
Subject: [PATCH] [-Wunsafe-buffer-usage] Reduce false positives with constant
 arrays in libc warnings

For `snprintf(a, sizeof a, ...)`, the first two arguments form a safe
pattern if `a` is a constant array.  In such a case, this commit will
suppress the warning.

(rdar://117182250)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 27 ++-
 ...arn-unsafe-buffer-usage-libc-functions.cpp | 14 +-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 21d4368151eb47..3451ba7cc34901 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -668,7 +668,7 @@ AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) {
 
   auto *II = Node.getIdentifier();
 
-  if (!II)
+ if (!II)
 return false;
 
   StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
@@ -833,9 +833,16 @@ AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg,
 //
 // For the first two arguments: `ptr` and `size`, they are safe if in the
 // following patterns:
+//
+// Pattern 1:
 //ptr := DRE.data();
 //size:= DRE.size()/DRE.size_bytes()
 // And DRE is a hardened container or view.
+//
+// Pattern 2:
+//ptr := Constant-Array-DRE;
+//size:= any expression that has compile-time constant value equivalent to
+//   sizeof (Constant-Array-DRE)
 AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   const FunctionDecl *FD = Node.getDirectCallee();
 
@@ -856,6 +863,7 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   !Size->getType()->isIntegerType())
 return false; // not an snprintf call
 
+  // Pattern 1:
   static StringRef SizedObjs[] = {"span", "array", "vector",
   "basic_string_view", "basic_string"};
   Buf = Buf->IgnoreParenImpCasts();
@@ -886,6 +894,23 @@ AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) {
   SizedObj)
 return false; // It is in fact safe
 }
+
+  // Pattern 2:
+  if (auto *DRE = dyn_cast(Buf->IgnoreParenImpCasts())) {
+ASTContext &Ctx = Finder->getASTContext();
+
+if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) {
+  Expr::EvalResult ER;
+  // The array element type must be compatible with `char` otherwise an
+  // explicit cast will be needed, which will make this check unreachable.
+  // Therefore, the array extent is same as its' bytewise size.
+  if (Size->EvaluateAsConstantExpr(ER, Ctx)) {
+APSInt EVal = ER.Val.getInt(); // Size must have integer type
+
+return APSInt::compareValues(EVal, APSInt(CAT->getSize(), true)) != 0;
+  }
+}
+  }
   return true; // ptr and size are not in safe pattern
 }
 } // namespace libc_func_matchers
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index a3716073609f6f..8e777a7a51c994 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -83,6 +83,12 @@ void f(char * p, char * q, std::span s, 
std::span s2) {
   sscanf(p, "%s%d", "hello", *p);// expected-warning{{function 'sscanf' is 
unsafe}}
   sscanf_s(p, "%s%d", "hello", *p);  // expected-warning{{function 'sscanf_s' 
is unsafe}}
   fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // 
expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument 
is not guaranteed to be null-terminated}}
+
+  char a[10], b[11];
+  int c[10];
+
+  snprintf(a, sizeof(b), "%s", __PRETTY_FUNCTION__); // 
expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer 
and size may not match}}
+  snprintf((char*)c, sizeof(c), "%s", __PRETTY_FUNCTION__);  // 
expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer 
and size may not match}}
   fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // 
no warn
   printf("%s%d", "hello", *p); // no warn
   snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
@@ -93,13 +99,19 @@ void f(char * p, char * q, std::span s, 
std::span s2) {
   __asan_printf();// a printf but no argument, so no warn
 }
 
-void v(std::string s1, int *p) {
+void safe_examples(std::string s1, int *p) {
   snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, 
"hello", p, s1.c_str()); // no warn
   snprintf(s

[clang] [WIP][-Wunsafe-buffer-usage] Add a new option for unsafe buffer warnings on libc functions (PR #105383)

2024-09-11 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/105383
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-09-06 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

> Btw a question about the new warning: So with 
> -Wunsafe-buffer-usage-in-libc-call clang now warns on the following?
> 
> ```
> #include 
> 
> void foo(void) {
>   char q[10];
>   snprintf(q, 10, "%s", "hello");
> }
> ```
> 
> It says
> 
> ```
> foo.c:5:3: warning: function 'snprintf' is unsafe 
> [-Wunsafe-buffer-usage-in-libc-call]
> 5 |   snprintf(q, 10, "%s", "hello");
>   |   ^~
> foo.c:5:12: note:  buffer pointer and size may not match
> 5 |   snprintf(q, 10, "%s", "hello");
>   |^
> 1 warning generated.
> ```
> 
> Is that as expected? If so, how should snprintf be used to avoid the warning?

Yes, this is expected.  According to the C++ Safe Buffers programming model, 
buffer pointers should be changed to `std::span`.   Then  
`snprintf(span.data(), span.size(), ...)` is considered safe and will not be 
warned.  We may also allow the use of the form `snprintf(span.first(10).data(), 
10, ...)` later.

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-09-06 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

> > We're seeing a crash with this patch when compiling with -Weverything.
> > ```
> > clang: ../../clang/include/clang/AST/Expr.h:3026: const clang::Expr 
> > *clang::CallExpr::getArg(unsigned int) const: Assertion `Arg < getNumArgs() 
> > && "Arg access out of range!"' failed.
> > ```
> > 
> > 
> > 
> >   
> > 
> > 
> >   
> > 
> > 
> > 
> >   
> > I'm working on extracting a reproducer.
> 
> `clang -c -Weverything bbi-98867.c` with bbi-98867.c being just
> 
> ```
> void printf() { printf(); }
> ```
> 
> (I've creduced the reproducer, I can't share the full one)

Thanks for finding the bug and making a reproducer.  It has been fixed in 
de88d7db7b77141297fbb5638ee1e18d1fba53b8.

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-09-05 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

> There needs to be a flag to opt out of this to not break everybody who is 
> currently using unsafe-buffer-usage. #105383 seems to do that, but it really 
> should be in this same PR. Can this be reverted and relanded with the flag?

@aeubanks  you should be able to suppress the new warning with 
`-Wno-unsafe-buffer-usage-in-libc-call`

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-09-04 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

> There needs to be a flag to opt out of this to not break everybody who is 
> currently using unsafe-buffer-usage. #105383 seems to do that, but it really 
> should be in this same PR. Can this be reverted and relanded with the flag?

ok, will do

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-09-04 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-09-03 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/5] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 408 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"
+  "| do not use va_list that cannot be checked at compile-time for 
bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclC

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits


@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"

ziqingluo-90 wrote:

> I can also imagine that for some functions it won't be as simple as saying 
> "pointer parameter 1, 3 and 5" as it could be interplay between pointers, 
> integer parameters and buffer content.

Not sure if I understand your concern.  Could you give an example?

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

Addressed comments.

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/4] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 408 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"
+  "| do not use va_list that cannot be checked at compile-time for 
bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclC

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits


@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:-verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __asan_memcpy();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int swprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+
+namespace std {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+OutputIt d_first );
+
+  struct iterator{};
+  template
+  struct span {
+T * ptr;
+T * data();
+unsigned size_bytes();
+unsigned size();
+iterator begin() const noexcept;
+iterator end() const noexcept;
+  };
+
+  template
+  struct basic_string {
+T* p;
+T *c_str();
+T *data();
+unsigned size_bytes();
+  };
+
+  typedef basic_string string;
+  typedef basic_string wstring;
+
+  // C function under std:
+  void memcpy();
+  void strcpy();
+}
+
+void f(char * p, char * q, std::span s, std::span s2) {
+  memcpy();   // expected-warning{{function 'memcpy' 
introduces unsafe buffer access}}
+  std::memcpy();  // expected-warning{{function 'memcpy' 
introduces unsafe buffer access}}
+  __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' 
introduces unsafe buffer access}}
+  __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function 
'__builtin___memcpy_chk' introduces unsafe buffer access}}
+  __asan_memcpy();  // expected-warning{{function 
'__asan_memcpy' introduces unsafe buffer access}}
+  strcpy();   // expected-warning{{function 'strcpy' 
introduces unsafe buffer access}}
+  std::strcpy();  // expected-warning{{function 'strcpy' 
introduces unsafe buffer access}}
+  strcpy_s(); // expected-warning{{function 'strcpy_s' 
introduces unsafe buffer access}}
+  wcscpy_s(); // expected-warning{{function 'wcscpy_s' 
introduces unsafe buffer access}}
+
+
+  /* Test printfs */
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' 
introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or 
string literal as string pointer to guarantee null-termination}}
+  printf("%s%d", p, *p);  // expected-warning{{function 'printf' introduces 
unsafe buffer access}} expected-note{{use 'std::string::c_str' or string 
literal as string pointer to guarantee null-termination}}
+  sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' 
introduces unsafe buffer access}} expected-note{{change to 'snprintf' for 
explicit bounds checking}}
+  swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' 
introduces unsafe buffer access}} expected-note{{change to 'snprintf' for 
explicit bounds checking}}
+  snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 
'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and 
size may not match}}

ziqingluo-90 wrote:

yep, this case actually happens a lot in our downstream projects.   I planed to 
handle it in a follow-up patch: https://github.com/llvm/llvm-project/pull/105383

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][-Wunsafe-buffer-usage] Add a new option for unsafe buffer warnings on libc functions (PR #105383)

2024-08-28 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/105383
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits


@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"

ziqingluo-90 wrote:

> Have we tried getting data from a real project?

Yes! Basically these special handlings are all corresponding to actual cases in 
our downstream projects.

>I think for this warning in particular, it's valuable to point the user to the 
>specific %s argument. To, at least, make sure that they know we don't mean the 
>snprintf's target string or something.

I will let the notes point to specific argument source locations.  For better 
note messages, we could do it in a follow-up patch?

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits


@@ -443,6 +449,396 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) {
+  return Node.getNumArgs() == Num;
+}
+
+namespace libc_func_matchers {
+// Under `libc_func_matchers`, define a set of matchers that match unsafe
+// functions in libc and unsafe calls to them.
+
+//  A tiny parser to strip off common prefix and suffix of libc function names
+//  in real code.
+//
+//  Given a function name, `matchName` returns `CoreName` according to the
+//  following grammar:
+//
+//  LibcName := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName  |
+//  "__builtin___" + LibcName + "_chk"   |
+//  "__asan_" + LibcName
+//
+struct LibcFunNamePrefixSuffixParser {
+  StringRef matchName(StringRef FunName, bool isBuiltin) {
+// Try to match __builtin_:
+if (isBuiltin && FunName.starts_with("__builtin_"))
+  // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+  // no match:
+  return matchLibcNameOrBuiltinChk(
+  FunName.drop_front(10 /* truncate "__builtin_" */));
+// Try to match __asan_:
+if (FunName.starts_with("__asan_"))
+  return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */));
+return matchLibcName(FunName);
+  }
+
+  // Parameter `Name` is the substring after stripping off the prefix
+  // "__builtin_".
+  StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
+if (Name.starts_with("__") && Name.ends_with("_chk"))
+  return matchLibcName(
+  Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+return matchLibcName(Name);
+  }
+
+  StringRef matchLibcName(StringRef Name) {
+if (Name.ends_with("_s"))
+  return Name.drop_back(2 /* truncate "_s" */);
+return Name;
+  }
+};
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) {
+const CXXMethodDecl *MD = MCE->getMethodDecl();
+const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+if (MD && RD && RD->isInStdNamespace())
+  if (MD->getName() == "c_str" && RD->getName() == "basic_string")
+return true;
+  }
+  return false;
+}
+
+// Return true iff at least one of following cases holds:
+//  1. Format string is a literal and there is an unsafe pointer argument
+// corresponding to an `s` specifier;
+//  2. Format string is not a literal and there is least an unsafe pointer
+// argument (including the formatter argument).
+static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx,
+  ASTContext &Ctx, bool isKprintf = false) {
+  class StringFormatStringHandler
+  : public analyze_format_string::FormatStringHandler {
+const CallExpr *Call;
+unsigned FmtArgIdx;
+
+  public:
+StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx)
+: Call(Call), FmtArgIdx(FmtArgIdx) {}
+
+bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+   const char *startSpecifier,
+   unsigned specifierLen,
+   const TargetInfo &Target) override {
+  if (FS.getConversionSpecifier().getKind() ==
+  analyze_printf::PrintfConversionSpecifier::sArg) {
+unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
+  if (!isNullTermPointer(Call->getArg(ArgIdx)))
+return false; // stop parsing
+  }
+  return true; // continue parsing
+}
+  };
+
+  const Expr *Fmt = Call->getArg(FmtArgIdx);
+
+  if (auto *SL = dyn_cast(Fmt->IgnoreParenImpCasts())) {
+StringRef FmtStr = SL->getString();
+StringFormatStringHandler Handler(Call, FmtArgIdx);
+
+return analyze_format_string::ParsePrintfString(
+Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
+Ctx.getTargetInfo(), isKprintf);
+  }
+  // If format is not a string literal, we cannot analyze the format string.
+  // In this case, this call is considered unsafe if at least one argument
+  // (including the format argument) is unsafe pointer.
+  return llvm::any_of(
+  llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+  [](const Expr *Arg) {
+return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
+  });
+}
+
+// Matches a FunctionDecl node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of lib

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-28 Thread Ziqing Luo via cfe-commits


@@ -1025,6 +1421,92 @@ class DataInvocationGadget : public WarningGadget {
   DeclUseList getClaimedVarUseSites() const override { return {}; }
 };
 
+class UnsafeLibcFunctionCallGadget : public WarningGadget {
+  const CallExpr *const Call;
+  constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
+  // Extra tags for additional information:
+  constexpr static const char *const UnsafeSprintfTag =
+  "UnsafeLibcFunctionCall_sprintf";
+  constexpr static const char *const UnsafeSizedByTag =
+  "UnsafeLibcFunctionCall_sized_by";
+  constexpr static const char *const UnsafeStringTag =
+  "UnsafeLibcFunctionCall_string";
+  constexpr static const char *const UnsafeVaListTag =
+  "UnsafeLibcFunctionCall_va_list";
+
+  enum UnsafeKind {
+OTHERS = 0,   // no specific information, the callee function is unsafe
+SPRINTF = 1,  // never call `-sprintf`s, call `-snprintf`s instead.
+SIZED_BY = 2, // a pair of function arguments have "__sized_by" relation 
but
+  // they do not conform to safe patterns
+STRING = 3,   // an argument is a pointer-to-char-as-string but does not
+  // guarantee null-termination
+VA_LIST = 4,  // one of the `-printf`s function that take va_list, which is
+  // considered unsafe as it is not compile-time check
+  } WarnedFunKind = OTHERS;
+
+public:
+  UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result)
+  : WarningGadget(Kind::UnsafeLibcFunctionCall),
+Call(Result.Nodes.getNodeAs(Tag)) {
+if (Result.Nodes.getNodeAs(UnsafeSprintfTag))
+  WarnedFunKind = SPRINTF;
+else if (Result.Nodes.getNodeAs(UnsafeStringTag))
+  WarnedFunKind = STRING;
+else if (Result.Nodes.getNodeAs(UnsafeSizedByTag))
+  WarnedFunKind = SIZED_BY;
+else if (Result.Nodes.getNodeAs(UnsafeVaListTag))
+  WarnedFunKind = VA_LIST;
+  }
+
+  static Matcher matcher() {
+return stmt(
+stmt(
+anyOf(
+// Match a call to a predefined unsafe libc function (unless 
the
+// call has a sole string literal argument):
+callExpr(callee(functionDecl(
+ 
libc_func_matchers::isPredefinedUnsafeLibcFunc())),
+ unless(allOf(hasArgument(0, expr(stringLiteral())),
+  hasNumArgs(1,
+// Match a call to one of the `v*printf` functions taking
+// va-list, which cannot be checked at compile-time:
+callExpr(callee(functionDecl(
+ libc_func_matchers::isUnsafeVaListPrintfFunc(
+.bind(UnsafeVaListTag),

ziqingluo-90 wrote:

right, that's a good point!

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-21 Thread Ziqing Luo via cfe-commits


@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:-verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __asan_memcpy();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int swprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+
+namespace std {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+OutputIt d_first );
+
+  struct iterator{};
+  template
+  struct span {
+T * ptr;
+T * data();
+unsigned size_bytes();
+unsigned size();
+iterator begin() const noexcept;
+iterator end() const noexcept;
+  };
+
+  template
+  struct basic_string {
+T* p;
+T *c_str();
+T *data();
+unsigned size_bytes();
+  };
+
+  typedef basic_string string;
+  typedef basic_string wstring;
+
+  // C function under std:
+  void memcpy();
+  void strcpy();
+}
+
+void f(char * p, char * q, std::span s, std::span s2) {
+  memcpy();   // expected-warning{{function 'memcpy' 
introduces unsafe buffer access}}
+  std::memcpy();  // expected-warning{{function 'memcpy' 
introduces unsafe buffer access}}
+  __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' 
introduces unsafe buffer access}}
+  __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function 
'__builtin___memcpy_chk' introduces unsafe buffer access}}
+  __asan_memcpy();  // expected-warning{{function 
'__asan_memcpy' introduces unsafe buffer access}}
+  strcpy();   // expected-warning{{function 'strcpy' 
introduces unsafe buffer access}}
+  std::strcpy();  // expected-warning{{function 'strcpy' 
introduces unsafe buffer access}}
+  strcpy_s(); // expected-warning{{function 'strcpy_s' 
introduces unsafe buffer access}}
+  wcscpy_s(); // expected-warning{{function 'wcscpy_s' 
introduces unsafe buffer access}}
+
+
+  /* Test printfs */
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' 
introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or 
string literal as string pointer to guarantee null-termination}}
+  printf("%s%d", p, *p);  // expected-warning{{function 'printf' introduces 
unsafe buffer access}} expected-note{{use 'std::string::c_str' or string 
literal as string pointer to guarantee null-termination}}
+  sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' 
introduces unsafe buffer access}} expected-note{{change to 'snprintf' for 
explicit bounds checking}}
+  swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' 
introduces unsafe buffer access}} expected-note{{change to 'snprintf' for 
explicit bounds checking}}
+  snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 
'snprintf' introduces unsafe buffer access}} expected-note{{buffer pointer and 
size may not match}}
+  snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // 
expected-warning{{function 'snprintf' introduces unsafe buffer access}} 
expected-note{{buffer pointer and size may not match}}
+  snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // 
expected-warning{{function 'snwprintf' introduces unsafe buffer access}} 
expected-note{{buffer pointer and size may not match}}
+  snwprintf_s(s.data(), s2.size(), "%s%d", "hello", *p); // 
expected-warning{{function 'snwprintf_s' introduces unsafe buffer access}} 
expected-note{{buffer pointer and size may not match}}
+  vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // 
expected-warning{{function 'vsnprintf' introduces unsafe buffer access}} 
expected-note{{do not use va_list that cannot be checked at compile-time for 
bounds safety}}

ziqingluo-90 wrote:

change to "... va_list, which cannot ..."

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-21 Thread Ziqing Luo via cfe-commits


@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN:-verify %s
+
+typedef struct {} FILE;
+void memcpy();
+void __asan_memcpy();
+void strcpy();
+void strcpy_s();
+void wcscpy_s();
+unsigned strlen( const char* str );
+int fprintf( FILE* stream, const char* format, ... );
+int printf( const char* format, ... );
+int sprintf( char* buffer, const char* format, ... );
+int swprintf( char* buffer, const char* format, ... );
+int snprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... );
+int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... );
+int sscanf_s(const char * buffer, const char * format, ...);
+int sscanf(const char * buffer, const char * format, ... );
+
+namespace std {
+  template< class InputIt, class OutputIt >
+  OutputIt copy( InputIt first, InputIt last,
+OutputIt d_first );
+
+  struct iterator{};
+  template
+  struct span {
+T * ptr;
+T * data();
+unsigned size_bytes();
+unsigned size();
+iterator begin() const noexcept;
+iterator end() const noexcept;
+  };
+
+  template
+  struct basic_string {
+T* p;
+T *c_str();
+T *data();
+unsigned size_bytes();
+  };
+
+  typedef basic_string string;
+  typedef basic_string wstring;
+
+  // C function under std:
+  void memcpy();
+  void strcpy();
+}
+
+void f(char * p, char * q, std::span s, std::span s2) {
+  memcpy();   // expected-warning{{function 'memcpy' 
introduces unsafe buffer access}}
+  std::memcpy();  // expected-warning{{function 'memcpy' 
introduces unsafe buffer access}}
+  __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' 
introduces unsafe buffer access}}
+  __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function 
'__builtin___memcpy_chk' introduces unsafe buffer access}}
+  __asan_memcpy();  // expected-warning{{function 
'__asan_memcpy' introduces unsafe buffer access}}
+  strcpy();   // expected-warning{{function 'strcpy' 
introduces unsafe buffer access}}
+  std::strcpy();  // expected-warning{{function 'strcpy' 
introduces unsafe buffer access}}
+  strcpy_s(); // expected-warning{{function 'strcpy_s' 
introduces unsafe buffer access}}
+  wcscpy_s(); // expected-warning{{function 'wcscpy_s' 
introduces unsafe buffer access}}
+
+
+  /* Test printfs */
+  fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' 
introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or 
string literal as string pointer to guarantee null-termination}}
+  printf("%s%d", p, *p);  // expected-warning{{function 'printf' introduces 
unsafe buffer access}} expected-note{{use 'std::string::c_str' or string 
literal as string pointer to guarantee null-termination}}

ziqingluo-90 wrote:

change to "... or a string literal .."

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Don't Merge][-Wunsafe-buffer-usage] Add a new option for unsafe buffer warnings on libc functions (PR #105383)

2024-08-20 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/105383

Depending on https://github.com/llvm/llvm-project/pull/101583

(rdar://117182250)

>From b0970a849a21180315ace77e40f3db95fddac346 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/5] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 408 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 581434d33c5c9a..0396da735e0458 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12376,6 +12376,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"
+  "| do not use va_list that cannot be checked at compile-time for 
bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #inclu

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 408 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"
+  "| do not use va_list that cannot be checked at compile-time for 
bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclC

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of 
a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be 
used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from 
replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+  // This is safe: strlen("hello").  We don't want to be noisy on this case.
+  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+return Name == "strlen" && Node.getNumArgs() == 1 &&
+   isa(Node.getArg(0)->IgnoreParenImpCasts());
+  };
+
+  // Match predefined names:
+  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+return !isSafeStrlen(CoreName);
+
+  std::string NameWCS = CoreName.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+NameWCS[WcsPos++] = 's';
+NameWCS[WcsPos++] = 't';
+NameWCS[WcsPos++] = 'r';
+WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+return !isSafeStrlen(NameWCS);
+  // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. 
They
+  // all should end with "scanf"):
+  return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`.  We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf"))
+return false; // neither printf nor scanf
+  return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+Prefix = Prefix.drop_back(1);
+  return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) {
+const CXXMethodDecl *MD = MCE->getMethodDecl();
+const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+if (MD && RD && RD->isInStdNamespace(

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of 
a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be 
used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from 
replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+  // This is safe: strlen("hello").  We don't want to be noisy on this case.
+  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+return Name == "strlen" && Node.getNumArgs() == 1 &&
+   isa(Node.getArg(0)->IgnoreParenImpCasts());
+  };
+
+  // Match predefined names:
+  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+return !isSafeStrlen(CoreName);
+
+  std::string NameWCS = CoreName.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+NameWCS[WcsPos++] = 's';
+NameWCS[WcsPos++] = 't';
+NameWCS[WcsPos++] = 'r';
+WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+return !isSafeStrlen(NameWCS);
+  // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. 
They
+  // all should end with "scanf"):
+  return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`.  We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf"))
+return false; // neither printf nor scanf
+  return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+Prefix = Prefix.drop_back(1);
+  return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) {
+const CXXMethodDecl *MD = MCE->getMethodDecl();
+const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+if (MD && RD && RD->isInStdNamespace(

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of 
a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be 
used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from 
replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+  // This is safe: strlen("hello").  We don't want to be noisy on this case.
+  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+return Name == "strlen" && Node.getNumArgs() == 1 &&
+   isa(Node.getArg(0)->IgnoreParenImpCasts());
+  };
+
+  // Match predefined names:
+  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())

ziqingluo-90 wrote:

Done

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -483,6 +483,34 @@ bool 
clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
   return false;
 }
 
+unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex(
+const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO,
+const TargetInfo &Target) {
+  unsigned argIndex = ArgIndex;
+
+  // Keep looking for a %s format specifier until we have exhausted the string.
+  FormatStringHandler H;
+  while (I != E) {
+const PrintfSpecifierResult &FSR =
+ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false);
+// Did a fail-stop error of any kind occur when parsing the specifier?
+// If so, don't do any more processing.
+if (FSR.shouldStop())

ziqingluo-90 wrote:

Done

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -783,6 +783,18 @@ bool ParsePrintfString(FormatStringHandler &H,
 bool ParseFormatStringHasSArg(const char *beg, const char *end,
   const LangOptions &LO, const TargetInfo &Target);
 
+/// Parse C format string and return index (relative to `ArgIndex`) of the 
first
+/// found `s` specifier.  Return 0 if not found.
+/// \param I The start of the C format string; Updated to the first unparsed
+/// position upon return.
+/// \param E The end of the C format string;
+/// \param ArgIndex The argument index of the last found `s` specifier; Or the
+/// argument index of the formatter in initial case.
+unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E,

ziqingluo-90 wrote:

Done

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of 
a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be 
used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from 
replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set PredefinedNames{

ziqingluo-90 wrote:

Done

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -2292,6 +2292,18 @@ class UnsafeBufferUsageReporter : public 
UnsafeBufferUsageHandler {
 }
   }
 
+  void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) override {
+// We have checked that there is a direct callee with an identifier name:
+StringRef Name = Call->getDirectCallee()->getName();
+
+S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call)
+<< Name << Call->getSourceRange();

ziqingluo-90 wrote:

Done

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 408 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"
+  "| do not use va_list that cannot be checked at compile-time for 
bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclC

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of 
a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be 
used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from 
replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+  // This is safe: strlen("hello").  We don't want to be noisy on this case.
+  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+return Name == "strlen" && Node.getNumArgs() == 1 &&
+   isa(Node.getArg(0)->IgnoreParenImpCasts());
+  };
+
+  // Match predefined names:
+  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+return !isSafeStrlen(CoreName);
+
+  std::string NameWCS = CoreName.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+NameWCS[WcsPos++] = 's';
+NameWCS[WcsPos++] = 't';
+NameWCS[WcsPos++] = 'r';
+WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+return !isSafeStrlen(NameWCS);
+  // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. 
They
+  // all should end with "scanf"):
+  return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`.  We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf"))
+return false; // neither printf nor scanf
+  return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+Prefix = Prefix.drop_back(1);
+  return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) {
+const CXXMethodDecl *MD = MCE->getMethodDecl();
+const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+if (MD && RD && RD->isInStdNamespace(

[clang] [-Wunsafe-buffer-usage] Warning Libc functions (PR #101583)

2024-08-15 Thread Ziqing Luo via cfe-commits


@@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+namespace libc_fun_disjoint_inner_matchers {
+// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match
+// disjoint node sets.   They all take a `CoreName`, which is the substring of 
a
+// function name after `ignoreLibcPrefixAndSuffix`.  They are suppose to be 
used
+// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different
+// libc function calls.
+
+// Matches a function call node such that
+//  1. It's name, after stripping off predefined prefix and suffix, is
+// `CoreName`; and
+//  2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
+// is a set of libc function names.
+//
+//  Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`.
+//  And, the notation `CoreName[str/wcs]` means a new name obtained from 
replace
+//  string "wcs" with "str" in `CoreName`.
+//
+//  Also note, the set of predefined function names does not include `printf`
+//  functions, they are checked exclusively with other matchers below.
+//  Maintaining the invariant that all matchers under
+//  `libc_fun_disjoint_inner_matchers` are disjoint.
+AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+  // This is safe: strlen("hello").  We don't want to be noisy on this case.
+  auto isSafeStrlen = [&Node](StringRef Name) -> bool {
+return Name == "strlen" && Node.getNumArgs() == 1 &&
+   isa(Node.getArg(0)->IgnoreParenImpCasts());
+  };
+
+  // Match predefined names:
+  if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end())
+return !isSafeStrlen(CoreName);
+
+  std::string NameWCS = CoreName.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+NameWCS[WcsPos++] = 's';
+NameWCS[WcsPos++] = 't';
+NameWCS[WcsPos++] = 'r';
+WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+return !isSafeStrlen(NameWCS);
+  // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. 
They
+  // all should end with "scanf"):
+  return CoreName.ends_with("scanf");
+}
+
+// Match a call to one of the `-printf` functions taking `va_list`.  We cannot
+// check safety for these functions so they should be changed to their
+// non-va_list versions.
+AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf"))
+return false; // neither printf nor scanf
+  return Name.starts_with("v");
+}
+
+// Matches a call to one of the `-sprintf` functions (excluding the ones with
+// va_list) as they are always unsafe and should be changed to corresponding
+// `-snprintf`s.
+AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) {
+  StringRef Name = CoreName;
+
+  if (!Name.ends_with("printf") || Name.starts_with("v"))
+return false;
+
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+Prefix = Prefix.drop_back(1);
+  return Prefix == "s";
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (isa(Ptr->IgnoreParenImpCasts()))
+return true;
+  if (auto *MCE = dyn_cast(Ptr->IgnoreParenImpCasts())) {
+const CXXMethodDecl *MD = MCE->getMethodDecl();
+const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
+
+if (MD && RD && RD->isInStdNamespace(

[clang] [-Wunsafe-buffer-usage] Fix a small bug recently found (PR #102953)

2024-08-15 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/102953
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Safe Buffers] Fix a small bug recently found (PR #102953)

2024-08-13 Thread Ziqing Luo via cfe-commits


@@ -404,7 +404,7 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
 
   if (Arg0Ty->isConstantArrayType()) {
 const APSInt ConstArrSize =
-APSInt(cast(Arg0Ty)->getSize());
+APSInt(cast(Arg0Ty.getCanonicalType())->getSize());

ziqingluo-90 wrote:

Have changed to use `ASTContext::getAsConstantArrayType()`

https://github.com/llvm/llvm-project/pull/102953
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Safe Buffers] Fix a small bug recently found (PR #102953)

2024-08-13 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/102953

>From d6c860de3facc37f27b17a26a01e48bc02b4659b Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 12 Aug 2024 11:57:17 -0700
Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug in the ASTMatcher for span
 constructors

`QualType::isConstantArrayType()` checks canonical type. So a
following cast should be applied to canonical type as well. And, a
better option is to use `ASTContext::getAsArrayType()`.

```
if (Ty->isConstantArrayType())
  cast(Ty); // this is incorrect

if (auto *CAT = Context.getAsConstantArrayType(Ty))
  ...  // this is good

```
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp   | 14 +++---
 ...fe-buffer-usage-in-container-span-construct.cpp |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..db7ec5741f91c4 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -402,9 +402,9 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
 
   QualType Arg0Ty = Arg0->IgnoreImplicit()->getType();
 
-  if (Arg0Ty->isConstantArrayType()) {
-const APSInt ConstArrSize =
-APSInt(cast(Arg0Ty)->getSize());
+  if (auto *ConstArrTy =
+  Finder->getASTContext().getAsConstantArrayType(Arg0Ty)) {
+const APSInt ConstArrSize = APSInt(ConstArrTy->getSize());
 
 // Check form 4:
 return Arg1CV && APSInt::compareValues(ConstArrSize, *Arg1CV) == 0;
@@ -2659,7 +2659,7 @@ static FixItList fixVarDeclWithArray(const VarDecl *D, 
const ASTContext &Ctx,
 
   // Note: the code below expects the declaration to not use any type sugar 
like
   // typedef.
-  if (auto CAT = dyn_cast(D->getType())) {
+  if (auto CAT = Ctx.getAsConstantArrayType(D->getType())) {
 const QualType &ArrayEltT = CAT->getElementType();
 assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
 // FIXME: support multi-dimensional arrays
@@ -2792,9 +2792,9 @@ fixVariable(const VarDecl *VD, FixitStrategy::Kind K,
 return {};
   }
   case FixitStrategy::Kind::Array: {
-if (VD->isLocalVarDecl() &&
-isa(VD->getType().getCanonicalType()))
-  return fixVariableWithArray(VD, Tracker, Ctx, Handler);
+if (VD->isLocalVarDecl())
+  if (auto CAT = Ctx.getAsConstantArrayType(VD->getType()))
+return fixVariableWithArray(VD, Tracker, Ctx, Handler);
 
 DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array");
 return {};
diff --git 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
index a1ddc384e0d9b8..e97511593bbd81 100644
--- 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
+++ 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
@@ -79,6 +79,8 @@ namespace construct_wt_ptr_size {
 unsigned Y = 10;
 std::span S = std::span{&X, 1}; // no-warning
 int Arr[10];
+typedef int TenInts_t[10];
+TenInts_t Arr2;
 
 S = std::span{&X, 2};// expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}
 S = std::span{new int[10], 10};  // no-warning
@@ -90,6 +92,7 @@ namespace construct_wt_ptr_size {
 S = std::span{new int[10], 9};   // expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}  // not smart enough to tell 
its safe
 S = std::span{new int[10], Y};   // expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}  // not smart enough to tell 
its safe
 S = std::span{Arr, 10};  // no-warning
+S = std::span{Arr2, 10}; // no-warning
 S = std::span{Arr, Y};   // expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}  // not smart enough to tell 
its safe
 S = std::span{p, 0}; // no-warning
   }

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Warning Libc functions (PR #101583)

2024-08-12 Thread Ziqing Luo via cfe-commits


@@ -443,6 +447,314 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+
+  // A tiny name parser for unsafe libc function names with additional
+  // checks for `printf`s:
+  struct FuncNameMatch {
+const CallExpr *const Call;
+ASTContext &Ctx;
+enum ResultKind {
+  NO,   // no match
+  YES,  // matched a name that is not a member of the printf family
+  SPRINTF,  // matched `sprintf`
+  OTHER_PRINTF, // matched a printf function that is not `sprintf`
+};
+FuncNameMatch(const CallExpr *Call, ASTContext &Ctx)
+: Call(Call), Ctx(Ctx) {}
+
+// For a name `S` in `PredefinedNames` or a member of the printf/scanf
+// family, define matching function names with `S` by the grammar below:
+//
+//  CoreName := S | S["wcs"/"str"]
+//  LibcName := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName  |
+//  "__builtin___" + LibcName + "_chk"   |
+//  "__asan_" + LibcName
+//
+// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+ResultKind matchName(StringRef FunName, bool isBuiltin) {
+  // Try to match __builtin_:
+  if (isBuiltin && FunName.starts_with("__builtin_"))
+// Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+// no match:
+return matchLibcNameOrBuiltinChk(
+FunName.drop_front(10 /* truncate "__builtin_" */));
+  // Try to match __asan_:
+  if (FunName.starts_with("__asan_"))
+return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" 
*/));
+  return matchLibcName(FunName);
+}
+
+// Parameter `Name` is the substring after stripping off the prefix
+// "__builtin_".
+ResultKind matchLibcNameOrBuiltinChk(StringRef Name) {
+  if (Name.starts_with("__") && Name.ends_with("_chk"))
+return matchLibcName(
+Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+  return matchLibcName(Name);
+}
+
+ResultKind matchLibcName(StringRef Name) {
+  if (Name.ends_with("_s"))
+return matchCoreName(Name.drop_back(2 /* truncate "_s" */));
+  return matchCoreName(Name);
+}
+
+ResultKind matchCoreName(StringRef Name) {
+  if (PredefinedNames.find(Name.str()) != PredefinedNames.end())
+return !isSafeStrlen(Name) ? YES
+   : NO; // match unless it's a safe strlen 
call
+
+  std::string NameWCS = Name.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+NameWCS[WcsPos++] = 's';
+NameWCS[WcsPos++] = 't';
+NameWCS[WcsPos++] = 'r';
+WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+return !isSafeStrlen(NameWCS) ? YES : NO;
+  return matchPrintfOrScanfFamily(Name);
+}
+
+ResultKind matchPrintfOrScanfFamily(StringRef Name) {
+  if (Name.ends_with("scanf"))
+return YES; // simply warn about scanf functions
+  if (!Name.ends_with("printf"))
+return NO; // neither printf nor scanf
+  if (Name.starts_with("v"))
+// cannot check args for va_list, so `vprintf`s are treated as regular
+// unsafe libc calls:
+return YES;
+
+  // Truncate "printf", focus on prefixes.  There are different possible
+  // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw".  We strip off 
the
+  // 'w' and handle printfs di

[clang] Warning Libc functions (PR #101583)

2024-08-12 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 408 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"
+  "| do not use va_list that cannot be checked at compile-time for 
bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclC

[clang] Warning Libc functions (PR #101583)

2024-08-12 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From cce5781733a7c294f10dc75f48372ff6ee331239 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |  12 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 408 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 +++
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  86 
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e115..3e0fae6db7562d 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo
+  ///  is 0 if the callee function is not a member of the printf family;
+  ///  is 1 if the callee is `sprintf`;
+  ///  is 2 if arguments of the call have `__size_by` relation but are not in a
+  ///  safe pattern;
+  ///  is 3 if string arguments do not guarantee null-termination
+  ///  is 4 if the callee takes va_list
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b9..ac01b285ae833b 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..7e1e3686ce6554 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12383,6 +12383,13 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | buffer 
pointer and size may not match"
+  "| use 'std::string::c_str' or string literal as string pointer to 
guarantee null-termination"
+  "| do not use va_list that cannot be checked at compile-time for 
bounds safety}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b6..751fb75f6ed602 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,24 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclC

[clang] [Safe Buffers] Fix a small bug recently found (PR #102953)

2024-08-12 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/102953

`QualType::isConstantArrayType()` checks canonical type. So a following cast 
should be applied to canonical type as well:

```
if (Ty->isConstantArrayType())
  cast(Ty.getCanonicalType());  //  
cast(Ty) is incorrect
```

>From a15f22e9577154783165bdfc1021e640bbc4dcd0 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 12 Aug 2024 11:57:17 -0700
Subject: [PATCH] [-Wunsafe-buffer-usage] Fix a bug in the ASTMatcher for span
 constructors

`QualType::isConstantArrayType()` checks canonical type. So a
following cast should be applied to canonical type as well:

```
if (Ty->isConstantArrayType())
  cast(Ty.getCanonicalType());  //  
cast(Ty) is incorrect
```
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp   | 2 +-
 .../warn-unsafe-buffer-usage-in-container-span-construct.cpp   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..379b3ea7adf9e 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -404,7 +404,7 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
 
   if (Arg0Ty->isConstantArrayType()) {
 const APSInt ConstArrSize =
-APSInt(cast(Arg0Ty)->getSize());
+APSInt(cast(Arg0Ty.getCanonicalType())->getSize());
 
 // Check form 4:
 return Arg1CV && APSInt::compareValues(ConstArrSize, *Arg1CV) == 0;
diff --git 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
index a1ddc384e0d9b..f4f2a028f0b8f 100644
--- 
a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
+++ 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
@@ -79,6 +79,8 @@ namespace construct_wt_ptr_size {
 unsigned Y = 10;
 std::span S = std::span{&X, 1}; // no-warning
 int Arr[10];
+typedef int TenInts_t[10];
+TenInts_t Arr2;
 
 S = std::span{&X, 2};// expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}
 S = std::span{new int[10], 10};  // no-warning
@@ -90,6 +92,7 @@ namespace construct_wt_ptr_size {
 S = std::span{new int[10], 9};   // expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}  // not smart enough to tell 
its safe
 S = std::span{new int[10], Y};   // expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}  // not smart enough to tell 
its safe
 S = std::span{Arr, 10};  // no-warning
+S = std::span{Arr2, 10}; // no-warning
 S = std::span{Arr, Y};   // expected-warning{{the 
two-parameter std::span construction is unsafe as it can introduce mismatch 
between buffer size and the bound information}}  // not smart enough to tell 
its safe
 S = std::span{p, 0}; // no-warning
   }

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Warning Libc functions (PR #101583)

2024-08-05 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From 979619ea3ac0b52944468160f582d82fce3cee7d Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |   8 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   5 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 325 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  15 +
 ...-usage-libc-functions-inline-namespace.cpp |  60 
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  87 +
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 8 files changed, 501 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e11..62d0c6e350f37 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,13 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo is 0 if the callee function is not a member of the
+  /// printf family; is 1 if the callee is `sprintf`; is 2 if
+  /// the callee is other printfs
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 581434d33c5c9..e0fba7092dd6c 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12376,6 +12376,11 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def warn_unsafe_buffer_libc_call : Warning<
+  "function %0 introduces unsafe buffer access">,
+  InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | use 
'std::string::c_str' as pointer to guarantee null-termination}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..2028b931d67ea 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,19 +9,22 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clan

[clang] Warning Libc functions (PR #101583)

2024-08-05 Thread Ziqing Luo via cfe-commits


@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+
+  // A tiny name parser for unsafe libc function names with additional
+  // checks for `printf`s:
+  struct FuncNameMatch {
+const CallExpr *const Call;
+FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+
+// For a name `S` in `PredefinedNames` or a member of the printf/scanf
+// family, define matching function names with `S` by the grammar below:
+//
+//  CoreName := S | S["wcs"/"str"]
+//  LibcName := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName  |
+//  "__builtin___" + LibcName + "_chk"   |
+//  "__asan_" + LibcName
+//
+// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+bool matchName(StringRef FunName) {
+  // Try to match __builtin_:
+  if (FunName.starts_with("__builtin_"))
+// Then either it is __builtin_LibcName or __builtin___LibcName_chk or
+// no match:
+return matchLibcNameOrBuiltinChk(
+FunName.drop_front(10 /* truncate "__builtin_" */));
+  // Try to match __asan_:
+  if (FunName.starts_with("__asan_"))
+return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" 
*/));
+  return matchLibcName(FunName);
+}
+
+// Parameter `Name` is the substring after stripping off the prefix
+// "__builtin_".
+bool matchLibcNameOrBuiltinChk(StringRef Name) {
+  if (Name.starts_with("__") && Name.ends_with("_chk"))
+return matchLibcName(
+Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */);
+  return matchLibcName(Name);
+}
+
+bool matchLibcName(StringRef Name) {
+  if (Name.ends_with("_s"))
+return matchCoreName(Name.drop_back(2 /* truncate "_s" */));
+  return matchCoreName(Name);
+}
+
+bool matchCoreName(StringRef Name) {
+  if (PredefinedNames.find(Name.str()) != PredefinedNames.end())
+return !isSafeStrlen(Name); // match unless it's a safe strlen call
+
+  std::string NameWCS = Name.str();
+  size_t WcsPos = NameWCS.find("wcs");
+
+  while (WcsPos != std::string::npos) {
+NameWCS[WcsPos++] = 's';
+NameWCS[WcsPos++] = 't';
+NameWCS[WcsPos++] = 'r';
+WcsPos = NameWCS.find("wcs", WcsPos);
+  }
+  if (PredefinedNames.find(NameWCS) != PredefinedNames.end())
+return !isSafeStrlen(NameWCS);
+  return matchPrintfOrScanfFamily(Name);
+}
+
+bool matchPrintfOrScanfFamily(StringRef Name) {
+  if (Name.ends_with("scanf"))
+return true; // simply warn about scanf functions
+  if (!Name.ends_with("printf"))
+return false; // neither printf nor scanf
+  if (Name.starts_with("v"))
+// cannot check args for va_list, so `vprintf`s are treated as regular
+// unsafe libc calls:
+return true;
+
+  // Truncate "printf", focus on prefixes.  There are different possible
+  // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw".  We strip off 
the
+  // 'w' and handle printfs differently by "k", "f", "s", "sn" or no 
prefix:
+  StringRef Prefix = Name.drop_back(6);
+
+  if (Prefix.ends_with("w"))
+Prefix = Prefix.drop_back(1);
+  return isUnsafePrintf(Prefix);
+}
+
+// A pointer type expression is known to be null-terminated, if it has the
+// form: E.c_str(), for any expression E of `std::string` type.
+static bool isNullTermPointer(const Expr *Ptr) {
+  if (i

[clang] Warning Libc functions (PR #101583)

2024-08-03 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From 8a8b317c2b1c73117bcbbf771a783338448724a5 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |   8 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   2 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 322 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  81 +
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 7 files changed, 425 insertions(+), 5 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e11..62d0c6e350f37 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,13 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo is 0 if the callee function is not a member of the
+  /// printf family; is 1 if the callee is `sprintf`; is 2 if
+  /// the callee is other printfs
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 581434d33c5c9..ce77f459fab4f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12376,6 +12376,8 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | use 
'std::string::c_str' as pointer to guarantee null-termination}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..7402cf0f24c1e 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,19 +9,21 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
-#include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
 #include 
 #include 
@@

[clang] Warning Libc functions (PR #101583)

2024-08-03 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From 8a8b317c2b1c73117bcbbf771a783338448724a5 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.

(rdar://117182250)
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |   8 +
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td|   2 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 322 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  12 +
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  81 +
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 7 files changed, 425 insertions(+), 5 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 228b4ae1e3e11..62d0c6e350f37 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
@@ -106,6 +107,13 @@ class UnsafeBufferUsageHandler {
   virtual void handleUnsafeOperation(const Stmt *Operation,
  bool IsRelatedToDecl, ASTContext &Ctx) = 
0;
 
+  /// Invoked when a call to an unsafe libc function is found.
+  /// \param PrintfInfo is 0 if the callee function is not a member of the
+  /// printf family; is 1 if the callee is `sprintf`; is 2 if
+  /// the callee is other printfs
+  virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo,
+ASTContext &Ctx) = 0;
+
   /// Invoked when an unsafe operation with a std container is found.
   virtual void handleUnsafeOperationInContainer(const Stmt *Operation,
 bool IsRelatedToDecl,
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 581434d33c5c9..ce77f459fab4f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12376,6 +12376,8 @@ def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
   "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe 
invocation of span::data}0">,
   InGroup, DefaultIgnore;
+def note_unsafe_buffer_printf_call : Note<
+  "%select{| change to 'snprintf' for explicit bounds checking | use 
'std::string::c_str' as pointer to guarantee null-termination}0">;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
 def note_unsafe_buffer_variable_fixit_group : Note<
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..7402cf0f24c1e 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,19 +9,21 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
-#include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
 #include 
 #include 
@@ -44

[clang] Warning Libc functions (PR #101583)

2024-08-02 Thread Ziqing Luo via cfe-commits




ziqingluo-90 wrote:

Though we probably don't need this warning anymore but this is really a great 
catch of wording issues!

https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Warning Libc functions (PR #101583)

2024-08-01 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.
---
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 279 +-
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  80 +
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 4 files changed, 360 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..926a78b6f0096 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,25 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
+#include 
 #include 
 #include 
 #include 
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+
+  // A tiny name parser for unsafe libc function names with additional
+  // checks for `printf`s:
+  struct FuncNameMatch {
+const CallExpr *const Call;
+FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+
+// For a name `S` in `PredefinedNames` or a member of the printf/scanf
+// family, define matching function names with `S` by the grammar below:
+//
+//  CoreName := S | S["wcs"/"str"]
+//  LibcName := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName  |
+//  "__builtin___" + LibcName + "_chk"   |
+//  "__asan_" + LibcName
+//
+// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+bool matchName(StringRef FunName) {
+  // Try to match __builtin_:
+  if (FunName.starts_with("__builtin_"))
+

[clang] Warning Libc functions (PR #101583)

2024-08-01 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/101583

>From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.
---
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 279 +-
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  80 +
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 4 files changed, 360 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..926a78b6f0096 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,25 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
+#include 
 #include 
 #include 
 #include 
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+
+  // A tiny name parser for unsafe libc function names with additional
+  // checks for `printf`s:
+  struct FuncNameMatch {
+const CallExpr *const Call;
+FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+
+// For a name `S` in `PredefinedNames` or a member of the printf/scanf
+// family, define matching function names with `S` by the grammar below:
+//
+//  CoreName := S | S["wcs"/"str"]
+//  LibcName := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName  |
+//  "__builtin___" + LibcName + "_chk"   |
+//  "__asan_" + LibcName
+//
+// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+bool matchName(StringRef FunName) {
+  // Try to match __builtin_:
+  if (FunName.starts_with("__builtin_"))
+

[clang] Warning Libc functions (PR #101583)

2024-08-01 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/101583
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Warning Libc functions (PR #101583)

2024-08-01 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/101583

None

>From 0ccb5a8fc0855b2dfb948c4bb844e0394b5cedb0 Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Thu, 1 Aug 2024 16:36:27 -0700
Subject: [PATCH 1/2] [-Wunsafe-buffer-usage] Add warn on unsafe calls to libc
 functions

Warning about calls to libc functions involving buffer access.  Warned
functions are hardcoded by names.
---
 .../Analyses/UnsafeBufferUsageGadgets.def |   1 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  | 279 +-
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  80 +
 ...n-unsafe-buffer-usage-test-unreachable.cpp |   4 +-
 4 files changed, 360 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 242ad763ba62b..ac01b285ae833 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -38,6 +38,7 @@ WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_GADGET(UnsafeLibcFunctionCall)
 WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 866222380974b..926a78b6f0096 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,20 +9,25 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
+#include 
 #include 
 #include 
 #include 
@@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   return false;
 }
 
+AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) {
+  static const std::set PredefinedNames{
+  // numeric conversion:
+  "atof",
+  "atoi",
+  "atol",
+  "atoll",
+  "strtol",
+  "strtoll",
+  "strtoul",
+  "strtoull",
+  "strtof",
+  "strtod",
+  "strtold",
+  "strtoimax",
+  "strtoumax",
+  // "strfromf",  "strfromd", "strfroml", // C23?
+  // string manipulation:
+  "strcpy",
+  "strncpy",
+  "strlcpy",
+  "strcat",
+  "strncat",
+  "strlcat",
+  "strxfrm",
+  "strdup",
+  "strndup",
+  // string examination:
+  "strlen",
+  "strnlen",
+  "strcmp",
+  "strncmp",
+  "stricmp",
+  "strcasecmp",
+  "strcoll",
+  "strchr",
+  "strrchr",
+  "strspn",
+  "strcspn",
+  "strpbrk",
+  "strstr",
+  "strtok",
+  // "mem-" functions
+  "memchr",
+  "wmemchr",
+  "memcmp",
+  "wmemcmp",
+  "memcpy",
+  "memccpy",
+  "mempcpy",
+  "wmemcpy",
+  "memmove",
+  "wmemmove",
+  "memset",
+  "wmemset",
+  // IO:
+  "fread",
+  "fwrite",
+  "fgets",
+  "fgetws",
+  "gets",
+  "fputs",
+  "fputws",
+  "puts",
+  // others
+  "strerror_s",
+  "strerror_r",
+  "bcopy",
+  "bzero",
+  "bsearch",
+  "qsort",
+  };
+
+  // A tiny name parser for unsafe libc function names with additional
+  // checks for `printf`s:
+  struct FuncNameMatch {
+const CallExpr *const Call;
+FuncNameMatch(const CallExpr *Call) : Call(Call) {}
+
+// For a name `S` in `PredefinedNames` or a member of the printf/scanf
+// family, define matching function names with `S` by the grammar below:
+//
+//  CoreName := S | S["wcs"/"str"]
+//  LibcName := CoreName | CoreName + "_s"
+//  MatchingName := "__builtin_" + LibcName  |
+//  "__builtin___" + LibcName + "_chk"   |
+//  "__asan_" + LibcName
+//
+// (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.)
+bool matchName(StringRef FunName) {
+  // Try to match __builtin_:
+  if (FunName.starts_with("__builtin_"))
+  

[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-13 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-13 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/7] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h|  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp| 106 ++
 clang/lib/Serialization/ASTReader.cpp |  11 ++
 clang/lib/Serialization/ASTWriter.cpp |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h|  20 
 .../Modules/Inputs/SafeBuffers/test_sub2.h|  11 ++
 clang/test/Modules/safe_buffers_optout.cpp|  39 +++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+  SmallVector, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///  opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+  const SmallVectorImpl &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token 
&Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-   const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-  SafeBufferOptOutMap,
-  [&SourceMgr,
-

[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-13 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/6] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h|  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp| 106 ++
 clang/lib/Serialization/ASTReader.cpp |  11 ++
 clang/lib/Serialization/ASTWriter.cpp |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h|  20 
 .../Modules/Inputs/SafeBuffers/test_sub2.h|  11 ++
 clang/test/Modules/safe_buffers_optout.cpp|  39 +++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+  SmallVector, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///  opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+  const SmallVectorImpl &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token 
&Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-   const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-  SafeBufferOptOutMap,
-  [&SourceMgr,
-

[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-07 Thread Ziqing Luo via cfe-commits


@@ -0,0 +1,41 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodule-name=safe_buffers_test_base -x c++ 
%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN: -o %t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodule-name=safe_buffers_test_module -x c++ 
%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN: -o %t/safe_buffers_test_module.pcm
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodule-name=safe_buffers_test_optout -x c++ 
%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN: -o %t/safe_buffers_test_optout.pcm 
-fmodule-file=%t/safe_buffers_test_base.pcm 
-fmodule-file=%t/safe_buffers_test_module.pcm -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify 
-fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\
+// RUN: -std=c++20 -Wunsafe-buffer-usage
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify 
-fmodules-cache-path=%t 
-fmodule-map-file=%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -I 
%S/Inputs/SafeBuffers %s\
+// RUN: -x c++ -std=c++20 -Wunsafe-buffer-usage
+
+#include "test_sub1.h"

ziqingluo-90 wrote:

Thank you for the comment!   `split-file` is really neat!!

https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-07 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/5] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h|  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp| 106 ++
 clang/lib/Serialization/ASTReader.cpp |  11 ++
 clang/lib/Serialization/ASTWriter.cpp |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h|  20 
 .../Modules/Inputs/SafeBuffers/test_sub2.h|  11 ++
 clang/test/Modules/safe_buffers_optout.cpp|  39 +++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+  SmallVector, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///  opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+  const SmallVectorImpl &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token 
&Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-   const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-  SafeBufferOptOutMap,
-  [&SourceMgr,
-

[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-04 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-04 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

Learned more about how loaded files & ASTs are managed in clang.   Now opt-out 
regions in a loaded AST can span over multiple files (within one TU).  They now 
behave in a consistent way when they are from loaded files or local files.

https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-06-04 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/4] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h|  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp| 106 ++
 clang/lib/Serialization/ASTReader.cpp |  11 ++
 clang/lib/Serialization/ASTWriter.cpp |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h|  20 
 .../Modules/Inputs/SafeBuffers/test_sub2.h|  11 ++
 clang/test/Modules/safe_buffers_optout.cpp|  39 +++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+  SmallVector, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///  opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+  const SmallVectorImpl &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token 
&Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-   const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-  SafeBufferOptOutMap,
-  [&SourceMgr,
-

[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-31 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

Just realized how stupid I am (O_o)---the current implementation only compares 
source locations with opt-out regions that are in the same source file, for all 
source locations from loaded files.   This is not going to work for cases like 
below.
```
// in a pch header:
#pragma clang unsafe_buffer_usage begin
#include "other_header.h"  // -Wunsafe-buffer-usage in "other_header.h" is NOT 
suppressed !
#pragma clang unsafe_buffer_usage end
```

Add [WIP] back to this PR.  More work is needed.

https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-31 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Fix false positives for constant cases (PR #92432)

2024-05-20 Thread Ziqing Luo via cfe-commits


@@ -420,25 +420,64 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   //already duplicated
   //  - call both from Sema and from here
 
-  const auto *BaseDRE =
-  dyn_cast(Node.getBase()->IgnoreParenImpCasts());
-  if (!BaseDRE)
+  if (const auto *BaseDRE =
+  dyn_cast(Node.getBase()->IgnoreParenImpCasts())) {
+if (!BaseDRE->getDecl())
+  return false;
+if (const auto *CATy = Finder->getASTContext().getAsConstantArrayType(
+  BaseDRE->getDecl()->getType())) {
+  if (const auto *IdxLit = dyn_cast(Node.getIdx())) {
+const APInt ArrIdx = IdxLit->getValue();
+// FIXME: ArrIdx.isNegative() we could immediately emit an error as 
that's a
+// bug
+if (ArrIdx.isNonNegative() &&
+ArrIdx.getLimitedValue() < CATy->getLimitedSize())
+  return true;
+  }
+}
+  }
+
+  if (const auto *BaseSL =
+  dyn_cast(Node.getBase()->IgnoreParenImpCasts())) {
+if (const auto *CATy = Finder->getASTContext().getAsConstantArrayType(
+  BaseSL->getType())) {
+  if (const auto *IdxLit = dyn_cast(Node.getIdx())) {
+  const APInt ArrIdx = IdxLit->getValue();
+  // FIXME: ArrIdx.isNegative() we could immediately emit an error as 
that's a
+  // bug
+  if (ArrIdx.isNonNegative() &&
+  ArrIdx.getLimitedValue() < CATy->getLimitedSize())
+return true;
+  }
+}
+  }
+
+  return false;
+}
+
+AST_MATCHER(BinaryOperator, isSafePtrArithmetic) {
+  if (Node.getOpcode() != BinaryOperatorKind::BO_Add)
 return false;
-  if (!BaseDRE->getDecl())
+
+  const auto *LHSDRE =

ziqingluo-90 wrote:

For both the AST matchers, I'm just curious if we really need to make sure the 
left-hand side is a DRE?  Could we just try to test if its' type is a constant 
array type regardless of its' expression kind?

https://github.com/llvm/llvm-project/pull/92432
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-16 Thread Ziqing Luo via cfe-commits


@@ -1551,6 +1567,58 @@ bool 
Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
   return InSafeBufferOptOutRegion;
 }
 
+SmallVector
+Preprocessor::serializeSafeBufferOptOutMap() const {
+  assert(!InSafeBufferOptOutRegion &&
+ "Attempt to serialize safe buffer opt-out regions before file being "
+ "completely preprocessed");
+
+  SmallVector SrcSeq;
+
+  for (const auto &[begin, end] : SafeBufferOptOutMap) {
+SrcSeq.push_back(begin);
+SrcSeq.push_back(end);
+  }
+  // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
+  // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
+  // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
+  // It means that for each loading pch/module m, it just needs to load m's own
+  // `SafeBufferOptOutMap`.
+  return SrcSeq;
+}
+
+void Preprocessor::setDeserializedSafeBufferOptOutMap(
+const SmallVectorImpl &SourceLocations) {
+  auto It = SourceLocations.begin();
+
+  assert(SourceLocations.size() % 2 == 0 &&
+ "ill-formed SourceLocation sequence");
+  while (It != SourceLocations.end()) {
+SourceLocation begin = *It++;
+SourceLocation end = *It++;
+SourceLocation FileLoc = SourceMgr.getFileLoc(begin);
+FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first;
+
+if (FID.isInvalid()) {
+  // I suppose this should not happen:
+  assert(false && "Attempted to read a safe buffer opt-out region whose "
+  "begin location is associated to an invalid File ID.");
+  break;
+}
+assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file");
+// Here we assume that
+// `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`.
+// Though it may not hold in very rare and strange cases, i.e., a pair of

ziqingluo-90 wrote:

yes, we can emit front-end errors.  See this newly added test: 
https://github.com/llvm/llvm-project/pull/92031/files#diff-697faf822726557e5d62bcf106fc6cb03ff333b30b0a42ec192d93e6e12fd8fa

https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-16 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/3] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h|  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp| 106 ++
 clang/lib/Serialization/ASTReader.cpp |  11 ++
 clang/lib/Serialization/ASTWriter.cpp |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h|  20 
 .../Modules/Inputs/SafeBuffers/test_sub2.h|  11 ++
 clang/test/Modules/safe_buffers_optout.cpp|  39 +++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+  SmallVector, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///  opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+  const SmallVectorImpl &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token 
&Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-   const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-  SafeBufferOptOutMap,
-  [&SourceMgr,
-

[clang] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-16 Thread Ziqing Luo via cfe-commits


@@ -1551,6 +1567,58 @@ bool 
Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
   return InSafeBufferOptOutRegion;
 }
 
+SmallVector
+Preprocessor::serializeSafeBufferOptOutMap() const {
+  assert(!InSafeBufferOptOutRegion &&

ziqingluo-90 wrote:

Oh,  I'm sorry,  they are not warnings, they are errors: 
https://github.com/llvm/llvm-project/blob/c79690040acf5bb3d857558b0878db47f7f23dc3/clang/lib/Lex/Pragma.cpp#L1265

https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-15 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/2] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h|  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp| 106 ++
 clang/lib/Serialization/ASTReader.cpp |  11 ++
 clang/lib/Serialization/ASTWriter.cpp |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h|  20 
 .../Modules/Inputs/SafeBuffers/test_sub2.h|  11 ++
 clang/test/Modules/safe_buffers_optout.cpp|  39 +++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+  SmallVector, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///  opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+  const SmallVectorImpl &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token 
&Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-   const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-  SafeBufferOptOutMap,
-  [&SourceMgr,
-

[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-15 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

> I'm very happy that this is going somewhere! Everything makes sense to me but 
> I also don't know a lot about this stuff.
> 
> > During serialization, it only serializes regions of the current translation 
> > unit. Regions from loaded files are not serialized.
> 
> Hmm how does this work? When a precompiled header includes another 
> precompiled header, do they both get loaded independently and then you get to 
> combine the maps?

For loading multiple PCHs, it is a chain.  Suppose `TU` includes pch `B` and 
`B` includes pch `C`.   So one would compile and emit PCHs starting from `C`, 
then do it for `B` with `C` being loaded.  When compile `A`, it includes pch 
`B` from the command line and clang actually loads `C` and `B` separately.

It is the same story for modules, except that modules form a DAG instead of a 
chain.   Clang will load each module file in an order with respect to the DAG.  
 So for each module/PCH emitted by clang,  only their own opt-out regions need 
to be serialized.

https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-15 Thread Ziqing Luo via cfe-commits


@@ -1551,6 +1567,58 @@ bool 
Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
   return InSafeBufferOptOutRegion;
 }
 
+SmallVector
+Preprocessor::serializeSafeBufferOptOutMap() const {
+  assert(!InSafeBufferOptOutRegion &&

ziqingluo-90 wrote:

It's a warning.

https://github.com/llvm/llvm-project/pull/92031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

2024-05-13 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/92031

A pair of `#pragma clang unsafe_buffer_usage begin/end` pragmas marks a 
warning-opt-out region.  The begin and end locations (opt-out regions) are 
stored in preprocessor instances (PP) and will be queried by the 
`-Wunsafe-buffer-usage` analyzer.

The commit adds serialization and de-serialization implementations for the 
stored regions.  Basically,  the serialized representation of the regions of a 
PP is a (ordered) sequence of source location encodings.   During 
serialization, it only serializes regions of the current translation unit.  
Regions from loaded files are not serialized.  During de-serialization, regions 
from loaded files are stored by their source files.   When later one queries if 
a loaded location `l` is in an opt-out region,  PP looks up regions by the 
source file of `l`.  

The reported issue at upstream: 
https://github.com/llvm/llvm-project/issues/90501
rdar://124035402

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h|  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp| 106 ++
 clang/lib/Serialization/ASTReader.cpp |  11 ++
 clang/lib/Serialization/ASTWriter.cpp |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h|  20 
 .../Modules/Inputs/SafeBuffers/test_sub2.h|  11 ++
 clang/test/Modules/safe_buffers_optout.cpp|  39 +++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+  SmallVector, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///  opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+  const SmallVectorImpl &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source m

[clang] Add handling of the `-nostdlibinc` option to ccc-analyzer (PR #88017)

2024-04-09 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/88017
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Add handling of the `-nostdlibinc` option to ccc-analyzer (PR #88017)

2024-04-08 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/88017

Compiler options recognizable to ccc-analyzer are stored in maps.  An option 
missing in the map will be dropped by ccc-analyzer.  This causes a build error 
in one of our projects that only happens in scan-build but not regular build, 
because ccc-analyzer do not recognize `-nostdlibinc`.

This commit adds the option to the map.

>From 188398778993244223638e8433efe2ce703772dd Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Mon, 8 Apr 2024 10:25:43 -0700
Subject: [PATCH] Add handling of the `-nostdlibinc` option to ccc-analyzer

Compiler options recognizable to ccc-analyzer are stored in maps.  An
option missing in the map will be dropped by ccc-analyzer.  This
causes a build error in one of our projects that only happens in
scan-build but not regular build, because ccc-analyzer do not
recognize `-nostdlibinc`.

This commit adds the option to the map.

rdar://126082053
---
 clang/tools/scan-build/libexec/ccc-analyzer | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/tools/scan-build/libexec/ccc-analyzer 
b/clang/tools/scan-build/libexec/ccc-analyzer
index 60796a543fcd06..c5588814e8f0de 100755
--- a/clang/tools/scan-build/libexec/ccc-analyzer
+++ b/clang/tools/scan-build/libexec/ccc-analyzer
@@ -361,6 +361,7 @@ sub Analyze {
 
 my %CompileOptionMap = (
   '-nostdinc' => 0,
+  '-nostdlibinc' => 0,
   '-include' => 1,
   '-idirafter' => 1,
   '-imacros' => 1,

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage][NFC] clang-format UnsafeBufferUsage.cpp (PR #82027)

2024-02-27 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 approved this pull request.

LGTM!  Thanks for re-formatting the code.

https://github.com/llvm/llvm-project/pull/82027
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)

2024-02-16 Thread Ziqing Luo via cfe-commits


@@ -2326,15 +2312,21 @@ static FixItList fixLocalVarDeclWithSpan(const VarDecl 
*D, ASTContext &Ctx,
   return {};
 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts->begin()),
   std::make_move_iterator(InitFixIts->end()));
-// If the declaration has the form `T *ident = init`, we want to replace
-// `T *ident = ` with `std::span ident`:
-EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1);
   }
-  SS << " " << IdentText->str();
+  // For declaration of the form `T * ident = init;`, we want to replace
+  // `T * ` with `std::span`.
+  // We ignore CV-qualifiers so for `T * const ident;` we also want to replace
+  // just `T *` with `std::span`.
+  const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc();
   if (!EndLocForReplacement.isValid()) {
 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
 return {};
   }
+  // The only exception is that for `T *ident` we'll add a single space between
+  // "std::span" and "ident".
+  if (EndLocForReplacement.getLocWithOffset(1) == getVarDeclIdentifierLoc(D))

ziqingluo-90 wrote:

In case the identifier is a macro expansion, e.g., 
```
#define IDENT ident
T *IDENT;
```
, the if-condition will evaluate to false because the files of the two source 
locations are different.  
But this is rare and the fix-it is still correct.

https://github.com/llvm/llvm-project/pull/81935
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)

2024-02-16 Thread Ziqing Luo via cfe-commits


@@ -2326,15 +2312,21 @@ static FixItList fixLocalVarDeclWithSpan(const VarDecl 
*D, ASTContext &Ctx,
   return {};
 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts->begin()),
   std::make_move_iterator(InitFixIts->end()));
-// If the declaration has the form `T *ident = init`, we want to replace
-// `T *ident = ` with `std::span ident`:
-EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1);
   }
-  SS << " " << IdentText->str();
+  // For declaration of the form `T * ident = init;`, we want to replace
+  // `T * ` with `std::span`.
+  // We ignore CV-qualifiers so for `T * const ident;` we also want to replace
+  // just `T *` with `std::span`.
+  const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc();

ziqingluo-90 wrote:

This looks a correct optimization to me!  I probably didn't realize the 
existence of `getTypeSpecEndLoc()` when I did this.

https://github.com/llvm/llvm-project/pull/81935
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)

2024-02-16 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 approved this pull request.

LGTM!

https://github.com/llvm/llvm-project/pull/81935
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Minimize fixit range for pointer variables (PR #81935)

2024-02-16 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 edited 
https://github.com/llvm/llvm-project/pull/81935
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Fixits for array decayed to pointer (PR #80347)

2024-02-13 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 approved this pull request.

LGTM!
(We probably do not need to use `CHECK-DAG` at most places in our tests since 
we have fixed the non-deterministic issue, but it's irrelevant to this PR.)

https://github.com/llvm/llvm-project/pull/80347
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-07 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 approved this pull request.

I left all my comments here.  I didn't think quite through for some of them so 
feel free to ignore if they don't make sense to you.  

Other parts LGTM!

https://github.com/llvm/llvm-project/pull/80084
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-07 Thread Ziqing Luo via cfe-commits


@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD,
   return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
 }
 
+static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
+ UnsafeBufferUsageHandler &Handler) {
+  FixItList FixIts{};
+
+  if (auto CAT = dyn_cast(D->getType())) {
+const QualType &ArrayEltT = CAT->getElementType();
+assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
+// FIXME: support multi-dimensional arrays
+if (isa(ArrayEltT))
+  return {};
+
+const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D);
+
+// Get the spelling of the element type as written in the source file
+// (including macros, etc.).
+auto MaybeElemTypeTxt =
+getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(),
+ Ctx.getLangOpts());
+if (!MaybeElemTypeTxt)
+  return {};
+const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
+
+// Find the '[' token.
+std::optional NextTok = Lexer::findNextToken(
+IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts());
+while (NextTok && !NextTok->is(tok::l_square))
+  NextTok = Lexer::findNextToken(NextTok->getLocation(),
+ Ctx.getSourceManager(), 
Ctx.getLangOpts());
+if (!NextTok)
+  return {};
+const SourceLocation LSqBracketLoc = NextTok->getLocation();
+
+// Get the spelling of the array size as written in the source file
+// (including macros, etc.).
+auto MaybeArraySizeTxt = getRangeText(
+{LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()},
+Ctx.getSourceManager(), Ctx.getLangOpts());
+if (!MaybeArraySizeTxt)
+  return {};
+const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim();
+if (ArraySizeTxt.empty()) {
+  // FIXME: Support array size getting determined from the initializer.
+  // Examples:
+  //int arr1[] = {0, 1, 2};
+  //int arr2{3, 4, 5};
+  // We might be able to preserve the non-specified size with `auto` and
+  // `std::to_array`:
+  //auto arr1 = std::to_array({0, 1, 2});
+  return {};
+}
+
+std::optional IdentText =
+getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts());
+
+if (!IdentText) {
+  DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
+  return {};
+}
+
+SmallString<32> Replacement;
+raw_svector_ostream OS(Replacement);
+OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> "
+   << IdentText->str();
+
+FixIts.push_back(FixItHint::CreateReplacement(
+SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, OS.str()));

ziqingluo-90 wrote:

and this maybe?
```
typedef int ArrayT[10];
ArrayT a;
```

https://github.com/llvm/llvm-project/pull/80084
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-07 Thread Ziqing Luo via cfe-commits


@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD,
   return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
 }
 
+static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
+ UnsafeBufferUsageHandler &Handler) {
+  FixItList FixIts{};
+
+  if (auto CAT = dyn_cast(D->getType())) {

ziqingluo-90 wrote:

nitpick:  can we replace this check with an assertion?   Because it is already 
checked at the caller:
```
 case FixitStrategy::Kind::Array: {
if (VD->isLocalVarDecl() && isa(VD->getType()))
  return fixVariableWithArray(VD, Tracker, Ctx, Handler);
```

https://github.com/llvm/llvm-project/pull/80084
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-07 Thread Ziqing Luo via cfe-commits


@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD,
   return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
 }
 
+static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
+ UnsafeBufferUsageHandler &Handler) {
+  FixItList FixIts{};
+
+  if (auto CAT = dyn_cast(D->getType())) {
+const QualType &ArrayEltT = CAT->getElementType();
+assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
+// FIXME: support multi-dimensional arrays
+if (isa(ArrayEltT))
+  return {};
+
+const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D);
+
+// Get the spelling of the element type as written in the source file
+// (including macros, etc.).
+auto MaybeElemTypeTxt =
+getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(),
+ Ctx.getLangOpts());
+if (!MaybeElemTypeTxt)
+  return {};
+const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
+
+// Find the '[' token.
+std::optional NextTok = Lexer::findNextToken(
+IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts());
+while (NextTok && !NextTok->is(tok::l_square))
+  NextTok = Lexer::findNextToken(NextTok->getLocation(),
+ Ctx.getSourceManager(), 
Ctx.getLangOpts());
+if (!NextTok)
+  return {};
+const SourceLocation LSqBracketLoc = NextTok->getLocation();
+
+// Get the spelling of the array size as written in the source file
+// (including macros, etc.).
+auto MaybeArraySizeTxt = getRangeText(
+{LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()},
+Ctx.getSourceManager(), Ctx.getLangOpts());
+if (!MaybeArraySizeTxt)
+  return {};
+const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim();
+if (ArraySizeTxt.empty()) {
+  // FIXME: Support array size getting determined from the initializer.
+  // Examples:
+  //int arr1[] = {0, 1, 2};
+  //int arr2{3, 4, 5};
+  // We might be able to preserve the non-specified size with `auto` and
+  // `std::to_array`:
+  //auto arr1 = std::to_array({0, 1, 2});
+  return {};
+}
+
+std::optional IdentText =
+getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts());
+
+if (!IdentText) {
+  DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
+  return {};
+}
+
+SmallString<32> Replacement;
+raw_svector_ostream OS(Replacement);
+OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> "
+   << IdentText->str();
+
+FixIts.push_back(FixItHint::CreateReplacement(
+SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, OS.str()));

ziqingluo-90 wrote:

I'm wondering if it is possible to declare a constant array in a way that the 
type specifier is completely at the left-hand side of the identifier?   Like 
the form `T a;` while `T` represents a constant array type.   

How about this?  
```
int a[10];
decltype(a) b;
```

https://github.com/llvm/llvm-project/pull/80084
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-07 Thread Ziqing Luo via cfe-commits


@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD,
   return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
 }
 
+static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
+ UnsafeBufferUsageHandler &Handler) {
+  FixItList FixIts{};
+
+  if (auto CAT = dyn_cast(D->getType())) {
+const QualType &ArrayEltT = CAT->getElementType();
+assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
+// FIXME: support multi-dimensional arrays
+if (isa(ArrayEltT))
+  return {};
+
+const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D);
+
+// Get the spelling of the element type as written in the source file
+// (including macros, etc.).
+auto MaybeElemTypeTxt =
+getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(),
+ Ctx.getLangOpts());
+if (!MaybeElemTypeTxt)
+  return {};
+const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
+
+// Find the '[' token.
+std::optional NextTok = Lexer::findNextToken(
+IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts());
+while (NextTok && !NextTok->is(tok::l_square))
+  NextTok = Lexer::findNextToken(NextTok->getLocation(),
+ Ctx.getSourceManager(), 
Ctx.getLangOpts());
+if (!NextTok)
+  return {};
+const SourceLocation LSqBracketLoc = NextTok->getLocation();
+
+// Get the spelling of the array size as written in the source file
+// (including macros, etc.).
+auto MaybeArraySizeTxt = getRangeText(
+{LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()},
+Ctx.getSourceManager(), Ctx.getLangOpts());
+if (!MaybeArraySizeTxt)
+  return {};
+const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim();
+if (ArraySizeTxt.empty()) {
+  // FIXME: Support array size getting determined from the initializer.

ziqingluo-90 wrote:

In this case, maybe we can get the concrete size value from the array type 
directly since there is no spelling text for us to preserve?

https://github.com/llvm/llvm-project/pull/80084
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-07 Thread Ziqing Luo via cfe-commits


@@ -2495,10 +2471,100 @@ static FixItList fixVariableWithSpan(const VarDecl *VD,
   return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
 }
 
+static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
+ UnsafeBufferUsageHandler &Handler) {
+  FixItList FixIts{};
+
+  if (auto CAT = dyn_cast(D->getType())) {
+const QualType &ArrayEltT = CAT->getElementType();
+assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
+// FIXME: support multi-dimensional arrays
+if (isa(ArrayEltT))

ziqingluo-90 wrote:

is it possible that the element type is an array type but not a constant array 
type?

https://github.com/llvm/llvm-project/pull/80084
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Fix debug notes for unclaimed DREs (PR #80787)

2024-02-06 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 approved this pull request.

LGTM!  
I think I did something similar locally when I collected data of unclaimed DREs 
on those big adopter projects.   So I guess the data that we used to find major 
missing patterns for fix-its is still valid.

https://github.com/llvm/llvm-project/pull/80787
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-02 Thread Ziqing Luo via cfe-commits


@@ -2495,10 +2470,97 @@ static FixItList fixVariableWithSpan(const VarDecl *VD,
   return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler);
 }
 
+static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
+ UnsafeBufferUsageHandler &Handler) {
+  FixItList FixIts{};
+
+  if (auto CAT = dyn_cast(D->getType())) {
+const QualType &ArrayEltT = CAT->getElementType();
+assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
+
+const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D);
+
+// Get the spelling of the element type as written in the source file
+// (including macros, etc.).
+auto MaybeElemTypeTxt =
+getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(),
+ Ctx.getLangOpts());
+if (!MaybeElemTypeTxt)
+  return {};
+const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
+
+// Find the '[' token.
+std::optional NextTok = Lexer::findNextToken(
+IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts());
+while (NextTok && !NextTok->is(tok::l_square))
+  NextTok = Lexer::findNextToken(NextTok->getLocation(),
+ Ctx.getSourceManager(), 
Ctx.getLangOpts());
+if (!NextTok)
+  return {};
+const SourceLocation LSqBracketLoc = NextTok->getLocation();
+
+// Get the spelling of the array size as written in the source file
+// (including macros, etc.).

ziqingluo-90 wrote:

I wish we could have a more elegant way to get the spelling of the array size.  
I feel like `TypeLoc` is suppose to offer it but it does not behave as I expect 
in this case.  (`TypeLoc` also disappointed me in dealing with cv-qualifiers. 
`¯\_(ツ)_/¯`) 

https://github.com/llvm/llvm-project/pull/80084
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Introduce std::array fixits (PR #80084)

2024-02-02 Thread Ziqing Luo via cfe-commits


@@ -0,0 +1,164 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \
+// RUN:-fsafe-buffer-usage-suggestions \
+// RUN:-fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+typedef int * Int_ptr_t;
+typedef int Int_t;
+
+void simple(unsigned idx) {
+  int buffer[10];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array 
buffer"
+  buffer[idx] = 0;
+}
+
+void whitespace_in_declaration(unsigned idx) {
+  int  buffer_w   [   10 ];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:35}:"std::array 
buffer_w"
+  buffer_w[idx] = 0;
+}
+
+void comments_in_declaration(unsigned idx) {
+  int   /* [A] */   buffer_w  /* [B] */ [  /* [C] */ 10 /* [D] */  ] ;
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:69}:"std::array buffer_w"
+  buffer_w[idx] = 0;
+}
+
+void initializer(unsigned idx) {
+  int buffer[3] = {0};
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:16}:"std::array 
buffer"
+
+  int buffer2[3] = {0, 1, 2};
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array 
buffer2"
+
+  buffer[idx] = 0;
+  buffer2[idx] = 0;
+}
+
+void auto_size(unsigned idx) {
+  int buffer[] = {0, 1, 2};
+// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+// FIXME: implement support
+
+  buffer[idx] = 0;
+}
+
+void universal_initialization(unsigned idx) {
+  int buffer[] {0, 1, 2};
+// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+// FIXME: implement support
+
+  buffer[idx] = 0;
+}
+
+void multi_decl1(unsigned idx) {
+  int a, buffer[10];
+// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+// FIXME: implement support
+
+  buffer[idx] = 0;
+}
+
+void multi_decl2(unsigned idx) {
+  int buffer[10], b;
+// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+// FIXME: implement support
+
+  buffer[idx] = 0;
+}
+
+void local_array_ptr_to_const(unsigned idx, const int*& a) {
+  const int * buffer[10] = {a};
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:25}:"std::array buffer"
+  a = buffer[idx];
+}
+
+void local_array_const_ptr(unsigned idx, int*& a) {
+  int * const buffer[10] = {a};
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:25}:"std::array buffer"
+
+  a = buffer[idx];
+}
+
+void local_array_const_ptr_via_typedef(unsigned idx, int*& a) {
+  typedef int * const my_const_ptr;
+  my_const_ptr buffer[10] = {a};
+// CHECK: 
fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:26}:"std::array 
buffer"
+
+  a = buffer[idx];
+}
+
+void local_array_const_ptr_to_const(unsigned idx, const int*& a) {
+  const int * const buffer[10] = {a};
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:31}:"std::array buffer"
+
+  a = buffer[idx];
+
+}
+
+template
+void unsupported_local_array_in_template(unsigned idx) {
+  T buffer[10];
+// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:.*-[[@LINE-1]]:.*}
+  buffer[idx] = 0;
+}
+// Instantiate the template function to force its analysis.
+template void unsupported_local_array_in_template(unsigned);
+
+typedef unsigned int my_uint;
+void typedef_as_elem_type(unsigned idx) {
+  my_uint buffer[10];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:21}:"std::array buffer"
+  buffer[idx] = 0;
+}
+
+void macro_as_elem_type(unsigned idx) {
+#define MY_INT int
+  MY_INT buffer[10];
+// CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:.*-[[@LINE-1]]:.*}
+// FIXME: implement support
+
+  buffer[idx] = 0;
+#undef MY_INT
+}
+
+void macro_as_identifier(unsigned idx) {
+#define MY_BUFFER buffer
+  int MY_BUFFER[10];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:20}:"std::array 
MY_BUFFER"
+  MY_BUFFER[idx] = 0;
+#undef MY_BUFFER
+}
+
+void macro_as_size(unsigned idx) {
+#define MY_TEN 10
+  int buffer[MY_TEN];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:21}:"std::array buffer"
+  buffer[idx] = 0;
+#undef MY_TEN
+}
+
+void constant_as_size(unsigned idx) {
+  const unsigned my_const = 10;
+  int buffer[my_const];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:23}:"std::array buffer"
+  buffer[idx] = 0;
+}
+
+void subscript_negative() {
+  int buffer[10];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array 
buffer"
+
+  // For constant-size arrays any negative index will lead to buffer underflow.
+  // std::array::operator[] has unsigned parameter so the value will be casted 
to unsigned.
+  // This will very likely be buffer overflow but hardened std::array catch 
these at runtime.
+  buffer[-5] = 0;
+}
+
+void subscript_signed(int signed_idx) {
+  int buffer[10];
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array 
buffer"
+
+  // For constant-size arrays any negative index will lead to buffer underflow.
+  // std::array::operator[] has unsigned parameter so the value will be casted 
to unsigned.
+  // This will very likely be buffer overflow but hardened std::array catches 
these at runtime.
+  buffer[signed_idx] = 0;
+}

ziqingluo-90 wrote:

maybe we can have a test showing that multi-dimentional arrays will not be 
fixed?

https://github.com/llvm/llvm-project/pull/80084
__

[clang] [-Wunsafe-buffer-usage] Add a new warning for uses of std::span two-parameter constructors (PR #77148)

2024-01-26 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/77148
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)

2024-01-26 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 closed 
https://github.com/llvm/llvm-project/pull/78380
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)

2024-01-25 Thread Ziqing Luo via cfe-commits

ziqingluo-90 wrote:

@SavchenkoValeriy, sorry for the late response.  
Your suggested fix works for my minimal example as well as my local project 
where this bug was originated.
And I failed to come up with a new counter-example for the fix so I believe it 
will also prevent future non-termination issues. 
I have updated the PR with respect to the simpler fix.

https://github.com/llvm/llvm-project/pull/78380
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)

2024-01-25 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/78380

>From e7c3a3fc2a4f5d5714044a1c407bfe56f328680d Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Tue, 16 Jan 2024 17:45:01 -0800
Subject: [PATCH 1/4] [-Wcompletion-handler] Fix a non-termination issue

The Called-Once dataflow analysis could never terminate as a
consequence of non-monotonic update on states.  States of kind Escape
can override states leading to non-monotonic update.

The fix uses finite state sets instead of single states to represent
CFG block outputs, which grows in uni-direction.  To preserve the
behavior of the analyzer, a single state can be extracted from a state
set. The extraction follows the idea that Escape can override error
states within one block but obeys to the partial-order for join.

rdar://119671856
---
 clang/lib/Analysis/CalledOnceCheck.cpp | 120 ++---
 clang/test/SemaObjC/warn-called-once.m |  10 +++
 2 files changed, 120 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp 
b/clang/lib/Analysis/CalledOnceCheck.cpp
index 04c5f6aa9c7450..3fab93b1d09cdf 100644
--- a/clang/lib/Analysis/CalledOnceCheck.cpp
+++ b/clang/lib/Analysis/CalledOnceCheck.cpp
@@ -163,7 +163,7 @@ class ParameterStatus {
 NotVisited = 0x8, /* 1000 */
 // We already reported a violation and stopped tracking calls for this
 // parameter.
-Reported = 0x15, /*  */
+Reported = 0xF, /*  */
 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported)
   };
 
@@ -215,6 +215,55 @@ class ParameterStatus {
   const Expr *Call = nullptr;
 };
 
+// Represents a set of `ParameterStatus`s collected in a single CFGBlock.
+class ParamStatusSet {
+private:
+  // The kinds of `States` spans from 0x0 to 0x15, so 16 bits are enough:
+  llvm::BitVector Set{16, false};
+  // Following the exisitng idea: if there are multiple calls of interest in 
one
+  // block, recording one of them is good enough.
+  const Expr *Call = nullptr;
+
+public:
+  ParamStatusSet() {}
+
+  // Add a new `ParameterStatus` to the set.  Returns true iff the adding 
status
+  // was new to the set.
+  bool add(ParameterStatus PS) {
+assert(PS.getKind() != ParameterStatus::Kind::NotVisited &&
+   "the status cannot be NotVisited after visiting a block");
+if (Set.test(PS.getKind()))
+  return false;
+Set.set(PS.getKind());
+if (PS.seenAnyCalls())
+  Call = &PS.getCall();
+return true;
+  }
+
+  // Get one `ParameterStatus` to represent the set of them.  The idea is to
+  // take a join on them but let ESCAPE overrides error statuses.
+  ParameterStatus summaryStatus() {
+unsigned Summary = 0x0;
+
+for (unsigned Idx :
+ llvm::make_range(Set.set_bits_begin(), Set.set_bits_end()))
+  Summary |= Idx;
+assert(Summary == 0x0 || Summary == 0x1 || Summary == 0x3 ||
+   Summary == 0x5 || Summary == 0x7 ||
+   Summary == 0xF && "expecting a defined value");
+
+ParameterStatus::Kind SummaryKind = ParameterStatus::Kind(Summary);
+
+if (SummaryKind > ParameterStatus::Kind::NON_ERROR_STATUS &&
+Set.test(ParameterStatus::Kind::Escaped)) {
+  SummaryKind = ParameterStatus::Kind::Escaped;
+}
+if (ParameterStatus::seenAnyCalls(SummaryKind))
+  return {SummaryKind, Call};
+return {SummaryKind};
+  }
+};
+
 /// State aggregates statuses of all tracked parameters.
 class State {
 public:
@@ -274,6 +323,53 @@ class State {
 
 private:
   ParamSizedVector ParamData;
+
+  friend class CFGBlockOutputState;
+
+  State(ParamSizedVector ParamData) : ParamData(ParamData) {}
+
+  unsigned size() const { return ParamData.size(); }
+};
+
+// A different kind of "state" in addition to `State`.  `CFGBlockOutputState`
+// are created for dealing with the non-termination issue due to `State`s are
+// not being updated monotonically at the output of each CFGBlock.
+// A `CFGBlockOutputState` is in fact a finite set of `State`s that
+// grows monotonically.  One can extract a `State` from a 
`CFGBlockOutputState`.
+// Note that the `State`-extraction  does NOT guarantee monotone but it
+// respects the existing semantics.  Specifically, ESCAPE is "greater than"
+// other error states in a single path but is "less than" them at JOIN.
+//
+// `CFGBlockOutputState` will be used to terminate the fix-point computation.
+class CFGBlockOutputState {
+private:
+  ParamSizedVector StatusSets;
+
+public:
+  CFGBlockOutputState(unsigned Size) : StatusSets{Size} {};
+
+  // Update this `CFGBlockOutputState` with a newly computed `State`.  Return
+  // true iff `CFGBlockOutputState` changed.
+  bool update(const State &NewState) {
+bool Changed = false;
+
+for (unsigned Idx = 0; Idx < NewState.size(); ++Idx) {
+  if (StatusSets[Idx].add(NewState.getStatusFor(Idx))) {
+Changed = true;
+  }
+}
+return Changed;
+  }
+
+  // Return a `State` that represents the `CFGBlockOutputState`.
+  State ge

[clang] [-Wcompletion-handler] Fix a non-termination issue (PR #78380)

2024-01-25 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/78380

>From e7c3a3fc2a4f5d5714044a1c407bfe56f328680d Mon Sep 17 00:00:00 2001
From: Ziqing Luo 
Date: Tue, 16 Jan 2024 17:45:01 -0800
Subject: [PATCH 1/3] [-Wcompletion-handler] Fix a non-termination issue

The Called-Once dataflow analysis could never terminate as a
consequence of non-monotonic update on states.  States of kind Escape
can override states leading to non-monotonic update.

The fix uses finite state sets instead of single states to represent
CFG block outputs, which grows in uni-direction.  To preserve the
behavior of the analyzer, a single state can be extracted from a state
set. The extraction follows the idea that Escape can override error
states within one block but obeys to the partial-order for join.

rdar://119671856
---
 clang/lib/Analysis/CalledOnceCheck.cpp | 120 ++---
 clang/test/SemaObjC/warn-called-once.m |  10 +++
 2 files changed, 120 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp 
b/clang/lib/Analysis/CalledOnceCheck.cpp
index 04c5f6aa9c7450..3fab93b1d09cdf 100644
--- a/clang/lib/Analysis/CalledOnceCheck.cpp
+++ b/clang/lib/Analysis/CalledOnceCheck.cpp
@@ -163,7 +163,7 @@ class ParameterStatus {
 NotVisited = 0x8, /* 1000 */
 // We already reported a violation and stopped tracking calls for this
 // parameter.
-Reported = 0x15, /*  */
+Reported = 0xF, /*  */
 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported)
   };
 
@@ -215,6 +215,55 @@ class ParameterStatus {
   const Expr *Call = nullptr;
 };
 
+// Represents a set of `ParameterStatus`s collected in a single CFGBlock.
+class ParamStatusSet {
+private:
+  // The kinds of `States` spans from 0x0 to 0x15, so 16 bits are enough:
+  llvm::BitVector Set{16, false};
+  // Following the exisitng idea: if there are multiple calls of interest in 
one
+  // block, recording one of them is good enough.
+  const Expr *Call = nullptr;
+
+public:
+  ParamStatusSet() {}
+
+  // Add a new `ParameterStatus` to the set.  Returns true iff the adding 
status
+  // was new to the set.
+  bool add(ParameterStatus PS) {
+assert(PS.getKind() != ParameterStatus::Kind::NotVisited &&
+   "the status cannot be NotVisited after visiting a block");
+if (Set.test(PS.getKind()))
+  return false;
+Set.set(PS.getKind());
+if (PS.seenAnyCalls())
+  Call = &PS.getCall();
+return true;
+  }
+
+  // Get one `ParameterStatus` to represent the set of them.  The idea is to
+  // take a join on them but let ESCAPE overrides error statuses.
+  ParameterStatus summaryStatus() {
+unsigned Summary = 0x0;
+
+for (unsigned Idx :
+ llvm::make_range(Set.set_bits_begin(), Set.set_bits_end()))
+  Summary |= Idx;
+assert(Summary == 0x0 || Summary == 0x1 || Summary == 0x3 ||
+   Summary == 0x5 || Summary == 0x7 ||
+   Summary == 0xF && "expecting a defined value");
+
+ParameterStatus::Kind SummaryKind = ParameterStatus::Kind(Summary);
+
+if (SummaryKind > ParameterStatus::Kind::NON_ERROR_STATUS &&
+Set.test(ParameterStatus::Kind::Escaped)) {
+  SummaryKind = ParameterStatus::Kind::Escaped;
+}
+if (ParameterStatus::seenAnyCalls(SummaryKind))
+  return {SummaryKind, Call};
+return {SummaryKind};
+  }
+};
+
 /// State aggregates statuses of all tracked parameters.
 class State {
 public:
@@ -274,6 +323,53 @@ class State {
 
 private:
   ParamSizedVector ParamData;
+
+  friend class CFGBlockOutputState;
+
+  State(ParamSizedVector ParamData) : ParamData(ParamData) {}
+
+  unsigned size() const { return ParamData.size(); }
+};
+
+// A different kind of "state" in addition to `State`.  `CFGBlockOutputState`
+// are created for dealing with the non-termination issue due to `State`s are
+// not being updated monotonically at the output of each CFGBlock.
+// A `CFGBlockOutputState` is in fact a finite set of `State`s that
+// grows monotonically.  One can extract a `State` from a 
`CFGBlockOutputState`.
+// Note that the `State`-extraction  does NOT guarantee monotone but it
+// respects the existing semantics.  Specifically, ESCAPE is "greater than"
+// other error states in a single path but is "less than" them at JOIN.
+//
+// `CFGBlockOutputState` will be used to terminate the fix-point computation.
+class CFGBlockOutputState {
+private:
+  ParamSizedVector StatusSets;
+
+public:
+  CFGBlockOutputState(unsigned Size) : StatusSets{Size} {};
+
+  // Update this `CFGBlockOutputState` with a newly computed `State`.  Return
+  // true iff `CFGBlockOutputState` changed.
+  bool update(const State &NewState) {
+bool Changed = false;
+
+for (unsigned Idx = 0; Idx < NewState.size(); ++Idx) {
+  if (StatusSets[Idx].add(NewState.getStatusFor(Idx))) {
+Changed = true;
+  }
+}
+return Changed;
+  }
+
+  // Return a `State` that represents the `CFGBlockOutputState`.
+  State ge

[clang] [-Wunsafe-buffer-usage] Fix AST matcher of UUCAddAssignGadget (PR #79392)

2024-01-25 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 approved this pull request.

LGTM,  It's great to catch and fix this bug!

https://github.com/llvm/llvm-project/pull/79392
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [-Wunsafe-buffer-usage] Add a new warning for uses of std::span two-parameter constructors (PR #77148)

2024-01-23 Thread Ziqing Luo via cfe-commits

https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/77148

>From 1b169f49a129a5a69a82386b486c8a46ecfc815a Mon Sep 17 00:00:00 2001
From: ziqingluo-90 
Date: Fri, 5 Jan 2024 13:39:39 -0800
Subject: [PATCH 1/3] [-Wunsafe-buffer-usage] Add a new warning for use of
 two-parameter std::span constructors

Constructing `std::span` objects with the two parameter constructors
could introduce mismatched bounds information, which defeats the
purpose of using `std::span`.  Therefore, we warn every use of such
constructors.

We also plan to incrementally teach `-Wunsafe-buffer-usage` about benign
usages of those constructors.

rdar://115817781
---
 .../Analysis/Analyses/UnsafeBufferUsage.h |   8 +-
 .../Analyses/UnsafeBufferUsageGadgets.def |   8 ++
 .../clang/Basic/DiagnosticSemaKinds.td|   3 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp  |  46 ++
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  15 +-
 ...ffer-usage-in-container-span-construct.cpp | 136 ++
 ...e-buffer-usage-warning-data-invocation.cpp |   2 +-
 7 files changed, 214 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index b28f2c6b99c50e..aca1ad998822c5 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -16,6 +16,7 @@
 
 #include "clang/AST/Decl.h"
 #include "clang/AST/Stmt.h"
+#include "clang/Basic/SourceLocation.h"
 #include "llvm/Support/Debug.h"
 
 namespace clang {
@@ -98,9 +99,14 @@ class UnsafeBufferUsageHandler {
 #endif
 
 public:
-  /// Returns a reference to the `Preprocessor`:
+  /// \return true iff buffer safety is opt-out at `Loc`; false otherwise.
   virtual bool isSafeBufferOptOut(const SourceLocation &Loc) const = 0;
 
+  /// \return true iff unsafe uses in containers should NOT be reported at
+  /// `Loc`; false otherwise.
+  virtual bool
+  ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0;
+
   virtual std::string
   getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc,
   StringRef WSSuffix = "") const = 0;
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index c9766168836510..07f805ebb11013 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -18,6 +18,12 @@
 #define WARNING_GADGET(name) GADGET(name)
 #endif
 
+/// A `WARNING_GADGET` subset, where the code pattern of each gadget
+/// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`).
+#ifndef WARNING_CONTAINER_GADGET
+#define WARNING_CONTAINER_GADGET(name) WARNING_GADGET(name)
+#endif
+
 /// Safe gadgets correspond to code patterns that aren't unsafe but need to be
 /// properly recognized in order to emit correct warnings and fixes over unsafe
 /// gadgets.
@@ -31,6 +37,7 @@ WARNING_GADGET(ArraySubscript)
 WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
 WARNING_GADGET(DataInvocation)
+WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)  // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
 FIXABLE_GADGET(PointerDereference)
@@ -43,4 +50,5 @@ FIXABLE_GADGET(PointerInit)
 
 #undef FIXABLE_GADGET
 #undef WARNING_GADGET
+#undef WARNING_CONTAINER_GADGET
 #undef GADGET
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e54f969c19039d..438c2fcdb1c77b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12075,6 +12075,9 @@ def note_unsafe_buffer_variable_fixit_together : Note<
   "%select{|, and change %2 to safe types to make function %4 bounds-safe}3">;
 def note_safe_buffer_usage_suggestions_disabled : Note<
   "pass -fsafe-buffer-usage-suggestions to receive code hardening 
suggestions">;
+def warn_unsafe_buffer_usage_in_container : Warning<
+  "the two-parameter std::span construction is unsafe as it can introduce 
mismatch between buffer size and the bound information">,
+  InGroup, DefaultIgnore;
 #ifndef NDEBUG
 // Not a user-facing diagnostic. Useful for debugging false negatives in
 // -fsafe-buffer-usage-suggestions (i.e. lack of -Wunsafe-buffer-usage fixits).
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 724c4304a07242..3b6d69cac1afd8 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -232,6 +232,11 @@ AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const 
Unsa

  1   2   >