https://github.com/pjalwadi updated https://github.com/llvm/llvm-project/pull/180471
>From 97b0885f0e1b8ad8b82cc148885117942d3d34be Mon Sep 17 00:00:00 2001 From: prajwal jalwadi <[email protected]> Date: Mon, 9 Feb 2026 09:50:37 +0530 Subject: [PATCH] "[Clang][UnsafeBufferUsage] Warn about two-arg string_view constructors" -m "This patch extends the unsafe buffer usage warning to cover std::string_view constructors that take a pointer and size, similar to the existing check for std::span. The warning message has been updated to be generic ('container construction' instead of 'span construction') and existing tests have been updated to match. Fixes #166644." --- clang/docs/ReleaseNotes.rst | 3 + .../Analysis/Analyses/UnsafeBufferUsage.h | 4 + .../Analyses/UnsafeBufferUsageGadgets.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 105 ++++++++++++++++++ clang/lib/Sema/AnalysisBasedWarnings.cpp | 22 ++++ .../warn-unsafe-buffer-usage-string-view.cpp | 37 ++++++ 7 files changed, 175 insertions(+) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a1bb1bd2467b7..7567dd0477f3c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -227,6 +227,9 @@ Improvements to Clang's diagnostics when accessing a member function on a past-the-end array element. (#GH179128) +- ``-Wunsafe-buffer-usage`` now warns about unsafe two-parameter constructors of + ``std::string_view`` (pointer and size), consistent with the existing warning for ``std::span``. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 876682ad779d4..bffb45022b8bc 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -129,6 +129,10 @@ class UnsafeBufferUsageHandler { bool IsRelatedToDecl, ASTContext &Ctx) = 0; + virtual void handleUnsafeOperationInStringView(const Stmt *Operation, + bool IsRelatedToDecl, + ASTContext &Ctx) = 0; + /// Invoked when a fix is suggested against a variable. This function groups /// all variables that must be fixed together (i.e their types must be changed /// to the same target type to prevent type mismatches) into a single fixit. diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 129ce95c1c0e0..7ce3c5f0fc7c5 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -42,6 +42,7 @@ WARNING_OPTIONAL_GADGET(ArraySubscript) WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall) WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall) WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` +WARNING_OPTIONAL_GADGET(StringViewTwoParamConstructor) FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f12677ac11600..bbc8ecdbbc3f6 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13549,6 +13549,9 @@ def note_safe_buffer_usage_suggestions_disabled : Note< def warn_unsafe_buffer_usage_in_container : Warning< "the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information">, InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore; +def warn_unsafe_buffer_usage_in_string_view : Warning< + "the two-parameter std::string_view construction is unsafe as it can introduce mismatch between buffer size and the bound information">, + InGroup<UnsafeBufferUsageInContainer>, DefaultIgnore; def warn_unsafe_buffer_usage_unique_ptr_array_access : Warning<"direct access using operator[] on std::unique_ptr<T[]> is unsafe due to lack of bounds checking">, InGroup<UnsafeBufferUsageInUniquePtrArrayAccess>, DefaultIgnore; def warn_unsafe_buffer_usage_in_static_sized_array : Warning<"direct access on T[N] is unsafe due to the lack of bounds checking">, diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 193851cc5f381..bce96eb7c6fc3 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -699,6 +699,45 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node, return isPtrBufferSafe(Arg0, Arg1, Ctx); } +static bool isSafeStringViewTwoParamConstruct(const CXXConstructExpr &Node, + ASTContext &Ctx) { + const Expr *Arg0 = Node.getArg(0)->IgnoreParenImpCasts(); + const Expr *Arg1 = Node.getArg(1)->IgnoreParenImpCasts(); + + // Pattern 1: String Literals (Safe if size <= length) + if (const auto *SL = dyn_cast<StringLiteral>(Arg0)) { + if (auto ArgSize = Arg1->getIntegerConstantExpr(Ctx)) { + if (ArgSize->getZExtValue() <= SL->getLength()) + return true; + } + } + + // Pattern 2: Constant Arrays (Safe if exact match) + QualType T0 = Arg0->getType().getCanonicalType(); + if (const auto *CAT = Ctx.getAsConstantArrayType(T0)) { + if (auto ArgSize = Arg1->getIntegerConstantExpr(Ctx)) { + // Wrap CAT->getSize() in APSInt to match ArgSize's type + if (llvm::APSInt::compareValues(llvm::APSInt(CAT->getSize(), /*isUnsigned=*/true), + *ArgSize) == 0) + return true; + } + } + + // Pattern 3: Zero length is safe + if (auto Val = Arg1->getIntegerConstantExpr(Ctx)) { + if (Val->isZero()) return true; + } + + // Pattern 4: Pointer/Iterator Pair + QualType T1 = Arg1->getType().getCanonicalType(); + if ((T0->isPointerType() && T1->isPointerType()) || + (T0->isRecordType() && T1->isRecordType())) { + return true; + } + + return false; +} + static bool isSafeArraySubscript(const ArraySubscriptExpr &Node, const ASTContext &Ctx, const bool IgnoreStaticSizedArrays) { @@ -1805,6 +1844,70 @@ class SpanTwoParamConstructorGadget : public WarningGadget { SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; } }; +class StringViewTwoParamConstructorGadget : public WarningGadget { + static constexpr const char *const StringViewTwoParamConstructorTag = + "stringViewTwoParamConstructor"; + const CXXConstructExpr *Ctor; // the string_view constructor expression + +public: + StringViewTwoParamConstructorGadget(const MatchResult &Result) + : WarningGadget(Kind::StringViewTwoParamConstructor), + Ctor(Result.getNodeAs<CXXConstructExpr>( + StringViewTwoParamConstructorTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::StringViewTwoParamConstructor; + } + + static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) { + const auto *CE = dyn_cast<CXXConstructExpr>(S); + if (!CE) + return false; + const auto *CDecl = CE->getConstructor(); + const auto *CRecordDecl = CDecl->getParent(); + + // MATCH: std::basic_string_view + bool IsStringView = + CRecordDecl->isInStdNamespace() && + CDecl->getDeclName().getAsString() == "basic_string_view" && + CE->getNumArgs() == 2; + + if (!IsStringView || isSafeStringViewTwoParamConstruct(*CE, Ctx)) + return false; + + Result.addNode(StringViewTwoParamConstructorTag, DynTypedNode::create(*CE)); + return true; + } + + static bool matches(const Stmt *S, ASTContext &Ctx, + const UnsafeBufferUsageHandler *Handler, + MatchResult &Result) { + if (ignoreUnsafeBufferInContainer(*S, Handler)) + return false; + return matches(S, Ctx, Result); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperationInStringView(Ctor, IsRelatedToDecl, Ctx); + } + + SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { + // If the constructor call is of the form `std::string_view{var, n}`, `var` + // is considered an unsafe variable. + if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) { + if (isa<VarDecl>(DRE->getDecl())) + return {DRE}; + } + return {}; + } + + SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; } +}; + /// A pointer initialization expression of the form: /// \code /// int *p = q; @@ -2954,6 +3057,8 @@ std::set<const Expr *> clang::findUnsafePointers(const FunctionDecl *FD) { const Expr *UnsafeArg = nullptr) override {} void handleUnsafeOperationInContainer(const Stmt *, bool, ASTContext &) override {} + void handleUnsafeOperationInStringView(const Stmt *, bool, + ASTContext &) override {} void handleUnsafeVariableGroup(const VarDecl *, const VariableGroupsManager &, FixItList &&, const Decl *, diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 4f04bc3999a24..e3193b462f9e1 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2568,6 +2568,28 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { } } + void handleUnsafeOperationInStringView(const Stmt *Operation, + bool IsRelatedToDecl, + ASTContext &Ctx) override { + // Extracting location: prioritize the specific location of the constructor + SourceLocation Loc = Operation->getBeginLoc(); + SourceRange Range = Operation->getSourceRange(); + + if (const auto *CtorExpr = dyn_cast<CXXConstructExpr>(Operation)) { + Loc = CtorExpr->getLocation(); + } + + // 1. Emit the primary warning for string_view + S.Diag(Loc, diag::warn_unsafe_buffer_usage_in_string_view) << Range; + + // 2. If a specific variable is 'blamed', emit the note + if (IsRelatedToDecl) { + // MsgParam 0 is "unsafe operation" + // Range helps the IDE underline the whole expression + S.Diag(Loc, diag::note_unsafe_buffer_operation) << 0 << Range; + } +} + void handleUnsafeVariableGroup(const VarDecl *Variable, const VariableGroupsManager &VarGrpMgr, FixItList &&Fixes, const Decl *D, diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp new file mode 100644 index 0000000000000..bf64922afd6e6 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-string-view.cpp @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -verify %s + +namespace std { + typedef __SIZE_TYPE__ size_t; + + template <typename CharT> + class basic_string_view { + public: + basic_string_view(const CharT *s, size_t count); + basic_string_view(const CharT *s); + template <typename It> + basic_string_view(It begin, It end); + }; + + using string_view = basic_string_view<char>; +} + +void test_string_view_scenarios(const char *ptr, std::size_t n) { + // --- UNSAFE CASES --- + std::string_view sv1(ptr, n); + // expected-warning@-1 {{the two-parameter std::string_view construction is unsafe as it can introduce mismatch between buffer size and the bound information}} + + // --- SAFE CASES (Should NOT warn for string_view construction) --- + std::string_view sv2("hello", 5); // Safe: String literal match + std::string_view sv3(ptr, 0); // Safe: Zero size + std::string_view sv4(ptr); // Safe: Single parameter + + // Note: 'ptr + 5' triggers general pointer arithmetic warning, + // but the string_view constructor itself is ignored by your gadget. + std::string_view sv5(ptr, ptr + 5); // expected-warning {{unsafe pointer arithmetic}} \ + // expected-note {{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +} + +void test_constant_array_safety() { + char buffer[10]; + std::string_view sv6(buffer, 10); // Safe: Constant array bound match +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
