https://github.com/pjalwadi updated 
https://github.com/llvm/llvm-project/pull/180471

>From 97b0885f0e1b8ad8b82cc148885117942d3d34be Mon Sep 17 00:00:00 2001
From: prajwal jalwadi <[email protected]>
Date: Mon, 9 Feb 2026 09:50:37 +0530
Subject: [PATCH] "[Clang][UnsafeBufferUsage] Warn about two-arg string_view
 constructors" -m "This patch extends the unsafe buffer usage warning to cover
 std::string_view constructors that take a pointer and size, similar to the
 existing check for std::span.

The warning message has been updated to be generic ('container construction' 
instead of 'span construction') and existing tests have been updated to match.

Fixes #166644."
---
 clang/docs/ReleaseNotes.rst                   |   3 +
 .../Analysis/Analyses/UnsafeBufferUsage.h     |   4 +
 .../Analyses/UnsafeBufferUsageGadgets.def     |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   3 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 105 ++++++++++++++++++
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |  22 ++++
 .../warn-unsafe-buffer-usage-string-view.cpp  |  37 ++++++
 7 files changed, 175 insertions(+)
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a1bb1bd2467b7..7567dd0477f3c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -227,6 +227,9 @@ Improvements to Clang's diagnostics
   when accessing a member function on a past-the-end array element.
   (#GH179128)
 
+- ``-Wunsafe-buffer-usage`` now warns about unsafe two-parameter constructors 
of
+  ``std::string_view`` (pointer and size), consistent with the existing 
warning for ``std::span``.
+
 Improvements to Clang's time-trace
 ----------------------------------
 
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 876682ad779d4..bffb45022b8bc 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -129,6 +129,10 @@ class UnsafeBufferUsageHandler {
                                                 bool IsRelatedToDecl,
                                                 ASTContext &Ctx) = 0;
 
+  virtual void handleUnsafeOperationInStringView(const Stmt *Operation,
+                                                 bool IsRelatedToDecl,
+                                                 ASTContext &Ctx) = 0;
+
   /// Invoked when a fix is suggested against a variable. This function groups
   /// all variables that must be fixed together (i.e their types must be 
changed
   /// to the same target type to prevent type mismatches) into a single fixit.
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 129ce95c1c0e0..7ce3c5f0fc7c5 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -42,6 +42,7 @@ WARNING_OPTIONAL_GADGET(ArraySubscript)
 WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall)
 WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall)
 WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
+WARNING_OPTIONAL_GADGET(StringViewTwoParamConstructor)
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
 FIXABLE_GADGET(PointerDereference)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f12677ac11600..bbc8ecdbbc3f6 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13549,6 +13549,9 @@ def note_safe_buffer_usage_suggestions_disabled : Note<
 def warn_unsafe_buffer_usage_in_container : Warning<
   "the two-parameter std::span construction is unsafe as it can introduce 
mismatch between buffer size and the bound information">,
   InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore;
+def warn_unsafe_buffer_usage_in_string_view : Warning<
+  "the two-parameter std::string_view construction is unsafe as it can 
introduce mismatch between buffer size and the bound information">,
+  InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore;
 def warn_unsafe_buffer_usage_unique_ptr_array_access : Warning<"direct access 
using operator[] on std::unique_ptr<T[]> is unsafe due to lack of bounds 
checking">,
   InGroup<UnsafeBufferUsageInUniquePtrArrayAccess>, DefaultIgnore;
 def warn_unsafe_buffer_usage_in_static_sized_array : Warning<"direct access on 
T[N] is unsafe due to the lack of bounds checking">,
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 193851cc5f381..bce96eb7c6fc3 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -699,6 +699,45 @@ static bool isSafeSpanTwoParamConstruct(const 
CXXConstructExpr &Node,
   return isPtrBufferSafe(Arg0, Arg1, Ctx);
 }
 
+static bool isSafeStringViewTwoParamConstruct(const CXXConstructExpr &Node,
+                                              ASTContext &Ctx) {
+  const Expr *Arg0 = Node.getArg(0)->IgnoreParenImpCasts();
+  const Expr *Arg1 = Node.getArg(1)->IgnoreParenImpCasts();
+
+  // Pattern 1: String Literals (Safe if size <= length)
+  if (const auto *SL = dyn_cast<StringLiteral>(Arg0)) {
+    if (auto ArgSize = Arg1->getIntegerConstantExpr(Ctx)) {
+      if (ArgSize->getZExtValue() <= SL->getLength())
+        return true;
+    }
+  }
+
+  // Pattern 2: Constant Arrays (Safe if exact match)
+  QualType T0 = Arg0->getType().getCanonicalType();
+  if (const auto *CAT = Ctx.getAsConstantArrayType(T0)) {
+   if (auto ArgSize = Arg1->getIntegerConstantExpr(Ctx)) {
+     // Wrap CAT->getSize() in APSInt to match ArgSize's type
+     if (llvm::APSInt::compareValues(llvm::APSInt(CAT->getSize(), 
/*isUnsigned=*/true), 
+                                    *ArgSize) == 0)
+       return true;
+   }
+ }
+
+  // Pattern 3: Zero length is safe
+  if (auto Val = Arg1->getIntegerConstantExpr(Ctx)) {
+    if (Val->isZero()) return true;
+  }
+
+  // Pattern 4: Pointer/Iterator Pair
+  QualType T1 = Arg1->getType().getCanonicalType();
+  if ((T0->isPointerType() && T1->isPointerType()) ||
+      (T0->isRecordType() && T1->isRecordType())) {
+    return true;
+  }
+
+  return false;
+}
+
 static bool isSafeArraySubscript(const ArraySubscriptExpr &Node,
                                  const ASTContext &Ctx,
                                  const bool IgnoreStaticSizedArrays) {
@@ -1805,6 +1844,70 @@ class SpanTwoParamConstructorGadget : public 
WarningGadget {
   SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
 };
 
+class StringViewTwoParamConstructorGadget : public WarningGadget {
+  static constexpr const char *const StringViewTwoParamConstructorTag =
+      "stringViewTwoParamConstructor";
+  const CXXConstructExpr *Ctor; // the string_view constructor expression
+
+public:
+  StringViewTwoParamConstructorGadget(const MatchResult &Result)
+      : WarningGadget(Kind::StringViewTwoParamConstructor),
+        Ctor(Result.getNodeAs<CXXConstructExpr>(
+            StringViewTwoParamConstructorTag)) {}
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::StringViewTwoParamConstructor;
+  }
+
+  static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
+    const auto *CE = dyn_cast<CXXConstructExpr>(S);
+    if (!CE)
+      return false;
+    const auto *CDecl = CE->getConstructor();
+    const auto *CRecordDecl = CDecl->getParent();
+
+    // MATCH: std::basic_string_view
+    bool IsStringView =
+        CRecordDecl->isInStdNamespace() &&
+        CDecl->getDeclName().getAsString() == "basic_string_view" &&
+        CE->getNumArgs() == 2;
+
+    if (!IsStringView || isSafeStringViewTwoParamConstruct(*CE, Ctx))
+      return false;
+
+    Result.addNode(StringViewTwoParamConstructorTag, 
DynTypedNode::create(*CE));
+    return true;
+  }
+
+  static bool matches(const Stmt *S, ASTContext &Ctx,
+                      const UnsafeBufferUsageHandler *Handler,
+                      MatchResult &Result) {
+    if (ignoreUnsafeBufferInContainer(*S, Handler))
+      return false;
+    return matches(S, Ctx, Result);
+  }
+
+  void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+                             bool IsRelatedToDecl,
+                             ASTContext &Ctx) const override {
+    Handler.handleUnsafeOperationInStringView(Ctor, IsRelatedToDecl, Ctx);
+  }
+
+  SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); }
+
+  DeclUseList getClaimedVarUseSites() const override {
+    // If the constructor call is of the form `std::string_view{var, n}`, `var`
+    // is considered an unsafe variable.
+    if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) {
+      if (isa<VarDecl>(DRE->getDecl()))
+        return {DRE};
+    }
+    return {};
+  }
+
+  SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
+};
+
 /// A pointer initialization expression of the form:
 ///  \code
 ///  int *p = q;
@@ -2954,6 +3057,8 @@ std::set<const Expr *> clang::findUnsafePointers(const 
FunctionDecl *FD) {
                               const Expr *UnsafeArg = nullptr) override {}
     void handleUnsafeOperationInContainer(const Stmt *, bool,
                                           ASTContext &) override {}
+    void handleUnsafeOperationInStringView(const Stmt *, bool,
+                                           ASTContext &) override {}
     void handleUnsafeVariableGroup(const VarDecl *,
                                    const VariableGroupsManager &, FixItList &&,
                                    const Decl *,
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 4f04bc3999a24..e3193b462f9e1 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2568,6 +2568,28 @@ class UnsafeBufferUsageReporter : public 
UnsafeBufferUsageHandler {
     }
   }
 
+  void handleUnsafeOperationInStringView(const Stmt *Operation,
+                                       bool IsRelatedToDecl,
+                                       ASTContext &Ctx) override {
+  // Extracting location: prioritize the specific location of the constructor
+  SourceLocation Loc = Operation->getBeginLoc();
+  SourceRange Range = Operation->getSourceRange();
+
+  if (const auto *CtorExpr = dyn_cast<CXXConstructExpr>(Operation)) {
+    Loc = CtorExpr->getLocation();
+  }
+
+  // 1. Emit the primary warning for string_view
+  S.Diag(Loc, diag::warn_unsafe_buffer_usage_in_string_view) << Range;
+
+  // 2. If a specific variable is 'blamed', emit the note
+  if (IsRelatedToDecl) {
+    // MsgParam 0 is "unsafe operation"
+    // Range helps the IDE underline the whole expression
+    S.Diag(Loc, diag::note_unsafe_buffer_operation) << 0 << Range;
+  }
+}
+
   void handleUnsafeVariableGroup(const VarDecl *Variable,
                                  const VariableGroupsManager &VarGrpMgr,
                                  FixItList &&Fixes, const Decl *D,
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp
new file mode 100644
index 0000000000000..bf64922afd6e6
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -verify %s
+
+namespace std {
+  typedef __SIZE_TYPE__ size_t;
+
+  template <typename CharT>
+  class basic_string_view {
+  public:
+    basic_string_view(const CharT *s, size_t count);
+    basic_string_view(const CharT *s);
+    template <typename It>
+    basic_string_view(It begin, It end);
+  };
+
+  using string_view = basic_string_view<char>;
+}
+
+void test_string_view_scenarios(const char *ptr, std::size_t n) {
+  // --- UNSAFE CASES ---
+  std::string_view sv1(ptr, n);
+  // expected-warning@-1 {{the two-parameter std::string_view construction is 
unsafe as it can introduce mismatch between buffer size and the bound 
information}}
+
+  // --- SAFE CASES (Should NOT warn for string_view construction) ---
+  std::string_view sv2("hello", 5);     // Safe: String literal match
+  std::string_view sv3(ptr, 0);          // Safe: Zero size
+  std::string_view sv4(ptr);             // Safe: Single parameter
+
+  // Note: 'ptr + 5' triggers general pointer arithmetic warning,
+  // but the string_view constructor itself is ignored by your gadget.
+  std::string_view sv5(ptr, ptr + 5); // expected-warning {{unsafe pointer 
arithmetic}} \
+                                      // expected-note {{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+}
+
+void test_constant_array_safety() {
+  char buffer[10];
+  std::string_view sv6(buffer, 10);      // Safe: Constant array bound match
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to