llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-analysis Author: Ziqing Luo (ziqingluo-90) <details> <summary>Changes</summary> The character buffer passed to a "%.*s" specifier may be safely bound if the precision is properly specified, even if the buffer does not guarantee null-termination. For example, ``` void f(std::span<char> span) { printf("%.*s", (int)span.size(), span.data()); // "span.data()" does not guarantee null-termination but is safely bound by "span.size()", so this call is safe } ``` rdar://154072130 --- Full diff: https://github.com/llvm/llvm-project/pull/145862.diff 2 Files Affected: - (modified) clang/lib/Analysis/UnsafeBufferUsage.cpp (+176-116) - (modified) clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp (+39) ``````````diff diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 6048169b56640..aa2d0e3172b6d 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -25,6 +25,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallSet.h" @@ -453,22 +454,108 @@ static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx) { } } +// Providing that `Ptr` is a pointer and `Size` is an unsigned-integral +// expression, returns true iff they follow one of the following safe +// patterns: +// 1. Ptr is `DRE.data()` and Size is `DRE.size()`, where DRE is a hardened +// container or view; +// +// 2. Ptr is `a` and Size is `n`, where `a` is of an array-of-T with constant +// size `n`; +// +// 3. Ptr is `&var` and Size is `1`; or +// Ptr is `std::addressof(...)` and Size is `1`; +// +// 4. Size is `0`; +static bool isPtrBufferSafe(const Expr *Ptr, const Expr *Size, + ASTContext &Ctx) { + // Pattern 1: + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) + if (auto *MCESize = + dyn_cast<CXXMemberCallExpr>(Size->IgnoreParenImpCasts())) { + auto *DREOfPtr = dyn_cast<DeclRefExpr>( + MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); + auto *DREOfSize = dyn_cast<DeclRefExpr>( + MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts()); + + if (!DREOfPtr || !DREOfSize) + return false; // not in safe pattern + if (DREOfPtr->getDecl() != DREOfSize->getDecl()) + return false; + if (MCEPtr->getMethodDecl()->getName() != "data") + return false; + // `MCEPtr->getRecordDecl()` must be non-null as `DREOfPtr` is non-null: + if (!MCEPtr->getRecordDecl()->isInStdNamespace()) + return false; + + auto *ObjII = MCEPtr->getRecordDecl()->getIdentifier(); + + if (!ObjII) + return false; + + bool AcceptSizeBytes = Ptr->getType()->getPointeeType()->isCharType(); + + if (!((AcceptSizeBytes && + MCESize->getMethodDecl()->getName() == "size_bytes") || + // Note here the pointer must be a pointer-to-char type unless there + // is explicit casting. If there is explicit casting, this branch + // is unreachable. Thus, at this branch "size" and "size_bytes" are + // equivalent as the pointer is a char pointer: + MCESize->getMethodDecl()->getName() == "size")) + return false; + + return llvm::is_contained({SIZED_CONTAINER_OR_VIEW_LIST}, + ObjII->getName()); + } + + Expr::EvalResult ER; + + // Pattern 2-4: + if (Size->EvaluateAsInt(ER, Ctx)) { + // Pattern 2: + if (auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { + if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) { + llvm::APSInt SizeInt = ER.Val.getInt(); + + return llvm::APSInt::compareValues( + SizeInt, llvm::APSInt(CAT->getSize(), true)) == 0; + } + return false; + } + + // Pattern 3: + if (ER.Val.getInt().isOne()) { + if (auto *UO = dyn_cast<UnaryOperator>(Ptr->IgnoreParenImpCasts())) + return UO && UO->getOpcode() == UnaryOperator::Opcode::UO_AddrOf; + if (auto *CE = dyn_cast<CallExpr>(Ptr->IgnoreParenImpCasts())) { + auto *FnDecl = CE->getDirectCallee(); + + return FnDecl && FnDecl->getNameAsString() == "addressof" && + FnDecl->isInStdNamespace(); + } + return false; + } + // Pattern 4: + if (ER.Val.getInt().isZero()) + return true; + } + return false; +} + // Given a two-param std::span construct call, matches iff the call has the // following forms: // 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE // 2. `std::span<T>{new T, 1}` -// 3. `std::span<T>{&var, 1}` or `std::span<T>{std::addressof(...), 1}` -// 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size -// `n` -// 5. `std::span<T>{any, 0}` -// 6. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where +// 3. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where // `f` is a function with attribute `alloc_size(N, M)`; // `args` represents the list of arguments; // `N, M` are parameter indexes to the allocating element number and size. // Sometimes, there is only one parameter index representing the total // size. -// 7. `std::span<T>{x.begin(), x.end()}` where `x` is an object in the +// 4. `std::span<T>{x.begin(), x.end()}` where `x` is an object in the // SIZED_CONTAINER_OR_VIEW_LIST. +// 5. `isPtrBufferSafe` returns true of the two arguments of the span +// constructor static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node, ASTContext &Ctx) { assert(Node.getNumArgs() == 2 && @@ -495,7 +582,7 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node, // Check form 5: return true; - // Check forms 1-3: + // Check forms 1-2: switch (Arg0->getStmtClass()) { case Stmt::CXXNewExprClass: if (auto Size = cast<CXXNewExpr>(Arg0)->getArraySize()) { @@ -509,35 +596,11 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node, return Arg1CV && Arg1CV->isOne(); } break; - case Stmt::UnaryOperatorClass: - if (cast<UnaryOperator>(Arg0)->getOpcode() == - UnaryOperator::Opcode::UO_AddrOf) - // Check form 3: - return Arg1CV && Arg1CV->isOne(); - break; - case Stmt::CallExprClass: - // Check form 3: - if (const auto *CE = dyn_cast<CallExpr>(Arg0)) { - const auto FnDecl = CE->getDirectCallee(); - if (FnDecl && FnDecl->getNameAsString() == "addressof" && - FnDecl->isInStdNamespace()) { - return Arg1CV && Arg1CV->isOne(); - } - } - break; default: break; } - QualType Arg0Ty = Arg0->IgnoreImplicit()->getType(); - - if (auto *ConstArrTy = Ctx.getAsConstantArrayType(Arg0Ty)) { - const llvm::APSInt ConstArrSize = llvm::APSInt(ConstArrTy->getSize()); - - // Check form 4: - return Arg1CV && llvm::APSInt::compareValues(ConstArrSize, *Arg1CV) == 0; - } - // Check form 6: + // Check form 3: if (auto CCast = dyn_cast<CStyleCastExpr>(Arg0)) { if (!CCast->getType()->isPointerType()) return false; @@ -566,7 +629,7 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node, } } } - // Check form 7: + // Check form 4: auto IsMethodCallToSizedObject = [](const Stmt *Node, StringRef MethodName) { if (const auto *MC = dyn_cast<CXXMemberCallExpr>(Node)) { const auto *MD = MC->getMethodDecl(); @@ -592,7 +655,9 @@ static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node, cast<CXXMemberCallExpr>(Arg1) ->getImplicitObjectArgument() ->IgnoreParenImpCasts()); - return false; + + // Check 5: + return isPtrBufferSafe(Arg0, Arg1, Ctx); } static bool isSafeArraySubscript(const ArraySubscriptExpr &Node, @@ -743,28 +808,81 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, const CallExpr *Call; unsigned FmtArgIdx; const Expr *&UnsafeArg; + ASTContext &Ctx; + + // Returns an `Expr` representing the precision if specified, null + // otherwise: + const Expr * + getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision, + const CallExpr *Call) { + unsigned PArgIdx = -1; + + if (Precision.hasDataArgument()) + PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx; + if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) { + const Expr *PArg = Call->getArg(PArgIdx); + + // Strip the cast if `PArg` is a cast-to-int expression: + if (auto *CE = dyn_cast<CastExpr>(PArg); + CE && CE->getType()->isSignedIntegerType()) + PArg = CE->getSubExpr(); + return PArg; + } + if (Precision.getHowSpecified() == + analyze_printf::OptionalAmount::HowSpecified::Constant) { + auto SizeTy = Ctx.getSizeType(); + llvm::APSInt PArgVal = llvm::APSInt( + llvm::APInt(Ctx.getTypeSize(SizeTy), Precision.getConstantAmount()), + true); + + return IntegerLiteral::Create(Ctx, PArgVal, Ctx.getSizeType(), {}); + } + return nullptr; + } public: StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx, - const Expr *&UnsafeArg) - : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {} + const Expr *&UnsafeArg, ASTContext &Ctx) + : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg), Ctx(Ctx) {} bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, unsigned specifierLen, const TargetInfo &Target) override { - if (FS.getConversionSpecifier().getKind() == - analyze_printf::PrintfConversionSpecifier::sArg) { - unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; - - if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) - if (!isNullTermPointer(Call->getArg(ArgIdx))) { - UnsafeArg = Call->getArg(ArgIdx); // output - // returning false stops parsing immediately - return false; - } - } - return true; // continue parsing + if (FS.getConversionSpecifier().getKind() != + analyze_printf::PrintfConversionSpecifier::sArg) + return true; // continue parsing + + unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; + + if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs())) + // If the `ArgIdx` is invalid, give up. + return true; // continue parsing + + const Expr *Arg = Call->getArg(ArgIdx); + + if (isNullTermPointer(Arg)) + // If Arg is a null-terminated pointer, it is safe anyway. + return true; // continue parsing + + // Otherwise, check if the specifier has a precision and if the character + // pointer is safely bound by the precision: + auto LengthModifier = FS.getLengthModifier(); + QualType ArgType = Arg->getType(); + bool IsArgTypeValid = // Is ArgType a character pointer type? + ArgType->isPointerType() && + (LengthModifier.getKind() == LengthModifier.AsWideChar + ? ArgType->getPointeeType()->isWideCharType() + : ArgType->getPointeeType()->isCharType()); + + ArgType.dump(); + if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call); + Precision && IsArgTypeValid) + if (isPtrBufferSafe(Arg, Precision, Ctx)) + return true; + // Handle unsafe case: + UnsafeArg = Call->getArg(ArgIdx); // output + return false; // returning false stops parsing immediately } }; @@ -780,7 +898,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, else goto CHECK_UNSAFE_PTR; - StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg); + StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx); return analyze_format_string::ParsePrintfString( Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), @@ -1052,24 +1170,11 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx, return false; } -// This matcher requires that it is known that the callee `isNormalPrintf`. -// Then it matches if the first two arguments of the call is a pointer and an -// integer and they are not in a safe pattern. -// -// For the first two arguments: `ptr` and `size`, they are safe if in the -// following patterns: -// -// Pattern 1: -// ptr := DRE.data(); -// size:= DRE.size()/DRE.size_bytes() -// And DRE is a hardened container or view. -// -// Pattern 2: -// ptr := Constant-Array-DRE; -// size:= any expression that has compile-time constant value equivalent to -// sizeof (Constant-Array-DRE) -static bool hasUnsafeSnprintfBuffer(const CallExpr &Node, - const ASTContext &Ctx) { +// This function requires that it is known that the callee `isNormalPrintf`. +// It returns true iff the first two arguments of the call is a pointer +// `Ptr` and an unsigned integer `Size` and they are NOT safe, i.e., +// `!isPtrBufferSafe(Ptr, Size)`. +static bool hasUnsafeSnprintfBuffer(const CallExpr &Node, ASTContext &Ctx) { const FunctionDecl *FD = Node.getDirectCallee(); assert(FD && "It should have been checked that FD is non-null."); @@ -1085,57 +1190,12 @@ static bool hasUnsafeSnprintfBuffer(const CallExpr &Node, QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType(); const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); - if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() || - !Size->getType()->isIntegerType()) + if (FirstPteTy.isConstQualified() || !FirstPteTy->isAnyCharacterType() || + !Buf->getType()->isPointerType() || + !Size->getType()->isUnsignedIntegerType()) return false; // not an snprintf call - // Pattern 1: - static StringRef SizedObjs[] = {SIZED_CONTAINER_OR_VIEW_LIST}; - Buf = Buf->IgnoreParenImpCasts(); - Size = Size->IgnoreParenImpCasts(); - if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf)) - if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { - auto *DREOfPtr = dyn_cast<DeclRefExpr>( - MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); - auto *DREOfSize = dyn_cast<DeclRefExpr>( - MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts()); - - if (!DREOfPtr || !DREOfSize) - return true; // not in safe pattern - if (DREOfPtr->getDecl() != DREOfSize->getDecl()) - return true; // not in safe pattern - if (MCEPtr->getMethodDecl()->getName() != "data") - return true; // not in safe pattern - - if (MCESize->getMethodDecl()->getName() == "size_bytes" || - // Note here the pointer must be a pointer-to-char type unless there - // is explicit casting. If there is explicit casting, this branch - // is unreachable. Thus, at this branch "size" and "size_bytes" are - // equivalent as the pointer is a char pointer: - MCESize->getMethodDecl()->getName() == "size") - for (StringRef SizedObj : SizedObjs) - if (MCEPtr->getRecordDecl()->isInStdNamespace() && - MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() == - SizedObj) - return false; // It is in fact safe - } - - // Pattern 2: - if (auto *DRE = dyn_cast<DeclRefExpr>(Buf->IgnoreParenImpCasts())) { - if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) { - Expr::EvalResult ER; - // The array element type must be compatible with `char` otherwise an - // explicit cast will be needed, which will make this check unreachable. - // Therefore, the array extent is same as its' bytewise size. - if (Size->EvaluateAsInt(ER, Ctx)) { - llvm::APSInt EVal = ER.Val.getInt(); // Size must have integer type - - return llvm::APSInt::compareValues( - EVal, llvm::APSInt(CAT->getSize(), true)) != 0; - } - } - } - return true; // ptr and size are not in safe pattern + return !isPtrBufferSafe(Buf, Size, Ctx); } } // namespace libc_func_matchers diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index a7c19bcac1607..6ae329a736d91 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -47,11 +47,24 @@ namespace std { T *c_str(); T *data(); unsigned size_bytes(); + unsigned size(); }; typedef basic_string<char> string; typedef basic_string<wchar_t> wstring; + template<typename T> + struct basic_string_view { + T *c_str() const noexcept; + T *data() const noexcept; + unsigned size(); + const T* begin() const noexcept; + const T* end() const noexcept; + }; + + typedef basic_string_view<char> string_view; + typedef basic_string_view<wchar_t> wstring_view; + // C function under std: void memcpy(); void strcpy(); @@ -125,10 +138,36 @@ void safe_examples(std::string s1, int *p) { fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn char a[10]; + char c = 'c'; snprintf(a, sizeof a, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn snprintf(a, sizeof(decltype(a)), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn snprintf(a, 10, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + snprintf(&c, 1, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + snprintf(nullptr, 0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn +} + +void test_sarg_precision(std::string Str, std::string_view Sv, std::wstring_view WSv, + std::span<char> SpC, std::span<int> SpI) { + printf("%.*s"); + printf("%.*s", (int)Str.size(), Str.data()); + printf("%.*s", (int)Str.size_bytes(), Str.data()); + printf("%.*s", (int)Sv.size(), Sv.data()); + printf("%.*s", (int)SpC.size(), SpC.data()); + printf("%.*s", SpC.size(), SpC.data()); + printf("%.*ls", WSv.size(), WSv.data()); + printf("%.*s", SpC.data()); // no warn because `SpC.data()` is passed to the precision while the actually string pointer is not given + + printf("%.*s", SpI.size(), SpI.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + printf("%.*s", SpI.size(), SpC.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + printf("%.*s", WSv.size(), WSv.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + + char a[10]; + int b[10]; + + printf("%.10s", a); + printf("%.11s", a); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + printf("%.10s", b); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} } `````````` </details> https://github.com/llvm/llvm-project/pull/145862 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits