https://github.com/pjalwadi updated 
https://github.com/llvm/llvm-project/pull/180471

>From 38d83a516c5045e1084fa65505e032146ecddf8d Mon Sep 17 00:00:00 2001
From: prajwal jalwadi <[email protected]>
Date: Mon, 9 Feb 2026 09:50:37 +0530
Subject: [PATCH] "[Clang][UnsafeBufferUsage] Warn about two-arg string_view
 constructors" -m "This patch extends the unsafe buffer usage warning to cover
 std::string_view constructors that take a pointer and size, similar to the
 existing check for std::span.

The warning message has been updated to be generic ('container construction' 
instead of 'span construction') and existing tests have been updated to match.

Fixes #166644."
---
 clang/docs/ReleaseNotes.rst                   |   3 +
 .../Analysis/Analyses/UnsafeBufferUsage.h     |   4 +
 .../Analyses/UnsafeBufferUsageGadgets.def     |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   3 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 127 ++++++++++++++++++
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |  22 +++
 .../warn-unsafe-buffer-usage-string-view.cpp  |  44 ++++++
 7 files changed, 204 insertions(+)
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a1bb1bd2467b7..7567dd0477f3c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -227,6 +227,9 @@ Improvements to Clang's diagnostics
   when accessing a member function on a past-the-end array element.
   (#GH179128)
 
+- ``-Wunsafe-buffer-usage`` now warns about unsafe two-parameter constructors 
of
+  ``std::string_view`` (pointer and size), consistent with the existing 
warning for ``std::span``.
+
 Improvements to Clang's time-trace
 ----------------------------------
 
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 876682ad779d4..bffb45022b8bc 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -129,6 +129,10 @@ class UnsafeBufferUsageHandler {
                                                 bool IsRelatedToDecl,
                                                 ASTContext &Ctx) = 0;
 
+  virtual void handleUnsafeOperationInStringView(const Stmt *Operation,
+                                                 bool IsRelatedToDecl,
+                                                 ASTContext &Ctx) = 0;
+
   /// Invoked when a fix is suggested against a variable. This function groups
   /// all variables that must be fixed together (i.e their types must be 
changed
   /// to the same target type to prevent type mismatches) into a single fixit.
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 129ce95c1c0e0..7ce3c5f0fc7c5 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -42,6 +42,7 @@ WARNING_OPTIONAL_GADGET(ArraySubscript)
 WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall)
 WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall)
 WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
+WARNING_OPTIONAL_GADGET(StringViewTwoParamConstructor)
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
 FIXABLE_GADGET(PointerDereference)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f12677ac11600..bbc8ecdbbc3f6 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13549,6 +13549,9 @@ def note_safe_buffer_usage_suggestions_disabled : Note<
 def warn_unsafe_buffer_usage_in_container : Warning<
   "the two-parameter std::span construction is unsafe as it can introduce 
mismatch between buffer size and the bound information">,
   InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore;
+def warn_unsafe_buffer_usage_in_string_view : Warning<
+  "the two-parameter std::string_view construction is unsafe as it can 
introduce mismatch between buffer size and the bound information">,
+  InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore;
 def warn_unsafe_buffer_usage_unique_ptr_array_access : Warning<"direct access 
using operator[] on std::unique_ptr<T[]> is unsafe due to lack of bounds 
checking">,
   InGroup<UnsafeBufferUsageInUniquePtrArrayAccess>, DefaultIgnore;
 def warn_unsafe_buffer_usage_in_static_sized_array : Warning<"direct access on 
T[N] is unsafe due to the lack of bounds checking">,
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 193851cc5f381..5c64f6a389942 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -699,6 +699,67 @@ static bool isSafeSpanTwoParamConstruct(const 
CXXConstructExpr &Node,
   return isPtrBufferSafe(Arg0, Arg1, Ctx);
 }
 
+static bool isSafeStringViewTwoParamConstruct(const CXXConstructExpr &Node,
+                                              ASTContext &Ctx) {
+  const Expr *Arg0 = Node.getArg(0)->IgnoreParenImpCasts();
+  const Expr *Arg1 = Node.getArg(1)->IgnoreParenImpCasts();
+
+  // Pattern 1: String Literals
+  if (const auto *SL = dyn_cast<StringLiteral>(Arg0)) {
+    if (auto ArgSize = Arg1->getIntegerConstantExpr(Ctx)) {
+      if 
(llvm::APSInt::compareValues(llvm::APSInt::getUnsigned(SL->getLength()),
+                                      *ArgSize) >= 0)
+        return true;
+      return false; // Explicitly unsafe if size > length
+    }
+  }
+
+  // Pattern 2: Constant Arrays
+  if (const auto *CAT = Ctx.getAsConstantArrayType(Arg0->getType())) {
+    if (auto ArgSize = Arg1->getIntegerConstantExpr(Ctx)) {
+      if (llvm::APSInt::compareValues(llvm::APSInt(CAT->getSize(), true), 
+                                      *ArgSize) >= 0)
+        return true;
+      return false; // Explicitly unsafe if size > ArraySize
+    }
+  }
+
+  // Pattern 3: Zero length
+  if (auto Val = Arg1->getIntegerConstantExpr(Ctx)) {
+    if (Val->isZero()) return true;
+  }
+
+  // Pattern 4: string_view(it, it) - Only safe if it's .begin() and .end() of 
the SAME object
+  auto GetContainerObj = [](const Expr *E) -> const Expr * {
+    E = E->IgnoreParenImpCasts();
+    if (const auto *MCE = dyn_cast<CXXMemberCallExpr>(E)) {
+      const auto *MD = MCE->getMethodDecl();
+      if (MD && (MD->getName() == "begin" || MD->getName() == "end"))
+        return MCE->getImplicitObjectArgument()->IgnoreParenImpCasts();
+    }
+    return nullptr;
+  };
+
+  const Expr *Obj0 = GetContainerObj(Arg0);
+  const Expr *Obj1 = GetContainerObj(Arg1);
+
+  if (Obj0 && Obj1) {
+    const auto *DRE0 = dyn_cast<DeclRefExpr>(Obj0);
+    const auto *DRE1 = dyn_cast<DeclRefExpr>(Obj1);
+
+    // If both are references to variables, they MUST point to the same 
declaration.
+    if (DRE0 && DRE1) {
+      if (DRE0->getDecl()->getCanonicalDecl() == 
DRE1->getDecl()->getCanonicalDecl())
+        return true;
+    }
+
+    // If they aren't both DeclRefExprs or don't match, we DO NOT return true.
+    // This ensures v1.begin(), v2.end() triggers a warning.
+  }
+
+  return false; // Default to unsafe
+}
+
 static bool isSafeArraySubscript(const ArraySubscriptExpr &Node,
                                  const ASTContext &Ctx,
                                  const bool IgnoreStaticSizedArrays) {
@@ -1805,6 +1866,70 @@ class SpanTwoParamConstructorGadget : public 
WarningGadget {
   SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
 };
 
+class StringViewTwoParamConstructorGadget : public WarningGadget {
+  static constexpr const char *const StringViewTwoParamConstructorTag =
+      "stringViewTwoParamConstructor";
+  const CXXConstructExpr *Ctor; // the string_view constructor expression
+
+public:
+  StringViewTwoParamConstructorGadget(const MatchResult &Result)
+      : WarningGadget(Kind::StringViewTwoParamConstructor),
+        Ctor(Result.getNodeAs<CXXConstructExpr>(
+            StringViewTwoParamConstructorTag)) {}
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::StringViewTwoParamConstructor;
+  }
+
+  static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
+    const auto *CE = dyn_cast<CXXConstructExpr>(S);
+    if (!CE)
+      return false;
+    const auto *CDecl = CE->getConstructor();
+    const auto *CRecordDecl = CDecl->getParent();
+
+    // MATCH: std::basic_string_view
+    bool IsStringView =
+        CRecordDecl->isInStdNamespace() &&
+        CDecl->getDeclName().getAsString() == "basic_string_view" &&
+        CE->getNumArgs() == 2;
+
+    if (!IsStringView || isSafeStringViewTwoParamConstruct(*CE, Ctx))
+      return false;
+
+    Result.addNode(StringViewTwoParamConstructorTag, 
DynTypedNode::create(*CE));
+    return true;
+  }
+
+  static bool matches(const Stmt *S, ASTContext &Ctx,
+                      const UnsafeBufferUsageHandler *Handler,
+                      MatchResult &Result) {
+    if (ignoreUnsafeBufferInContainer(*S, Handler))
+      return false;
+    return matches(S, Ctx, Result);
+  }
+
+  void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+                             bool IsRelatedToDecl,
+                             ASTContext &Ctx) const override {
+    Handler.handleUnsafeOperationInStringView(Ctor, IsRelatedToDecl, Ctx);
+  }
+
+  SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); }
+
+  DeclUseList getClaimedVarUseSites() const override {
+    // If the constructor call is of the form `std::string_view{var, n}`, `var`
+    // is considered an unsafe variable.
+    if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) {
+      if (isa<VarDecl>(DRE->getDecl()))
+        return {DRE};
+    }
+    return {};
+  }
+
+  SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
+};
+
 /// A pointer initialization expression of the form:
 ///  \code
 ///  int *p = q;
@@ -2954,6 +3079,8 @@ std::set<const Expr *> clang::findUnsafePointers(const 
FunctionDecl *FD) {
                               const Expr *UnsafeArg = nullptr) override {}
     void handleUnsafeOperationInContainer(const Stmt *, bool,
                                           ASTContext &) override {}
+    void handleUnsafeOperationInStringView(const Stmt *, bool,
+                                           ASTContext &) override {}
     void handleUnsafeVariableGroup(const VarDecl *,
                                    const VariableGroupsManager &, FixItList &&,
                                    const Decl *,
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 4f04bc3999a24..e3193b462f9e1 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2568,6 +2568,28 @@ class UnsafeBufferUsageReporter : public 
UnsafeBufferUsageHandler {
     }
   }
 
+  void handleUnsafeOperationInStringView(const Stmt *Operation,
+                                       bool IsRelatedToDecl,
+                                       ASTContext &Ctx) override {
+  // Extracting location: prioritize the specific location of the constructor
+  SourceLocation Loc = Operation->getBeginLoc();
+  SourceRange Range = Operation->getSourceRange();
+
+  if (const auto *CtorExpr = dyn_cast<CXXConstructExpr>(Operation)) {
+    Loc = CtorExpr->getLocation();
+  }
+
+  // 1. Emit the primary warning for string_view
+  S.Diag(Loc, diag::warn_unsafe_buffer_usage_in_string_view) << Range;
+
+  // 2. If a specific variable is 'blamed', emit the note
+  if (IsRelatedToDecl) {
+    // MsgParam 0 is "unsafe operation"
+    // Range helps the IDE underline the whole expression
+    S.Diag(Loc, diag::note_unsafe_buffer_operation) << 0 << Range;
+  }
+}
+
   void handleUnsafeVariableGroup(const VarDecl *Variable,
                                  const VariableGroupsManager &VarGrpMgr,
                                  FixItList &&Fixes, const Decl *D,
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp
new file mode 100644
index 0000000000000..f63986b3f2554
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage-in-container -verify %s
+
+namespace std {
+  typedef __SIZE_TYPE__ size_t;
+  template <typename T> class basic_string_view {
+  public:
+    basic_string_view(const T *, size_t);
+    template <typename It> basic_string_view(It, It);
+  };
+  typedef basic_string_view<char> string_view;
+  typedef basic_string_view<wchar_t> wstring_view;
+  template <typename T> class vector { public: T* begin(); T* end(); };
+}
+
+typedef std::size_t size_t;
+
+void test_final_coverage() {
+  std::vector<char> v1, v2;
+  
+  // 1. Iterator Pairs
+  std::string_view it_ok(v1.begin(), v1.end()); // no-warning
+  // expected-warning@+1 {{the two-parameter std::string_view construction is 
unsafe}}
+  std::string_view it_bad(v1.begin(), v2.end()); 
+
+  // 2. Character Types
+  std::string_view s1("hi", 2); // no-warning
+  // expected-warning@+1 {{the two-parameter std::string_view construction is 
unsafe}}
+  std::string_view s2("hi", 3); 
+  
+  std::wstring_view w1(L"hi", 2); // no-warning
+  // expected-warning@+1 {{the two-parameter std::string_view construction is 
unsafe}}
+  std::wstring_view w2(L"hi", 3); 
+
+  // 3. Arrays
+  char arr[5];
+  std::string_view a1(arr, 5); // no-warning
+  // expected-warning@+1 {{the two-parameter std::string_view construction is 
unsafe}}
+  std::string_view a2(arr, 6); 
+
+  // 4. Dynamic/Unknown
+  extern size_t get_size();
+  // expected-warning@+1 {{the two-parameter std::string_view construction is 
unsafe}}
+  std::string_view d1("hi", get_size()); 
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to