================ @@ -0,0 +1,295 @@ +//=== MissingTerminatingZeroChecker.cpp -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Check for string arguments passed to C library functions where the +// terminating zero is missing. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "llvm/ADT/BitVector.h" +#include <sstream> + +using namespace clang; +using namespace ento; + +namespace { + +struct StringData { + const MemRegion *StrRegion; + int64_t StrLength; + unsigned int Offset; + const llvm::BitVector *NonNullData; +}; + +class MissingTerminatingZeroChecker + : public Checker<check::Bind, check::PreCall> { +public: + void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + + void initOptions(bool NoDefaultIgnore, StringRef IgnoreList); + +private: + const BugType BT{this, "Missing terminating zero"}; + + using IgnoreEntry = std::pair<int, int>; + /// Functions (identified by name only) to ignore. + /// The entry stores a parameter index, or -1. + llvm::StringMap<IgnoreEntry> FunctionsToIgnore = { + {"stpncpy", {1, -1}}, {"strncat", {1, -1}}, {"strncmp", {0, 1}}, + {"strncpy", {1, -1}}, {"strndup", {0, -1}}, {"strnlen", {0, -1}}, + }; + + bool checkArg(unsigned int ArgI, CheckerContext &C, + const CallEvent &Call) const; + bool getStringData(StringData &DataOut, ProgramStateRef State, + SValBuilder &SVB, const MemRegion *StrReg) const; + ProgramStateRef setStringData(ProgramStateRef State, Loc L, + const llvm::BitVector &NonNullData) const; + void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C, + const char Msg[]) const; +}; + +} // namespace + +namespace llvm { +template <> struct FoldingSetTrait<llvm::BitVector> { + static inline void Profile(llvm::BitVector X, FoldingSetNodeID &ID) { + ID.AddInteger(X.size()); + for (unsigned int I = 0; I < X.size(); ++I) + ID.AddBoolean(X[I]); + } +}; +} // end namespace llvm + +// Contains for a "string" (character array) region if elements are known to be +// non-zero. The bit vector is indexed by the array element index and is true +// if the element is known to be non-zero. Size of the vector does not +// correspond to the extent of the memory region (can be smaller), the missing +// elements are considered to be false. +// (A value of 'false' means that the string element is zero or unknown.) +REGISTER_MAP_WITH_PROGRAMSTATE(NonNullnessData, const MemRegion *, + llvm::BitVector) ---------------- NagyDonat wrote:
> There is one Achilles heel of the RegionStore. Copying an object uses LCVs, > which can only be bound as a default binding, which doesn't have an extent. > It's correct that LCVs are default bindings. The problem is that default > bindings doesn't have an extent, aka. end/length. It would be _really_ nice to eliminate this limitation... > There is a lengthy discussion of the proposal that we at Sonar believe would > work, and we actually have a really early draft prototype of what I described > there. Unfortunately, we never got back to finishing it. Is this prototype available for public review? I would definitely like to have a look at it to understand the outline and challenges of this task. (Depending on the details, I may consider working on this goal -- but obviously I wouldn't want to start that without checking the prior work and ensuring alignment with your efforts.) > > > Or would it be better to "get" (or create?) an element region for all > > > elements of the string when for example a `strlen` is called? > > > > > > Yes, this is the standard approach, so it would be better (unless there are > > unavoidable issues with it). Obviously we must introduce a practical limit > > (32? 128? 256?) for the number of checked elements -- to avoid performance > > issues -- but otherwise this should work. > > I'd be very careful of doing this. Asking for an ElementRegion will > inherently allocate, and do hashtable lookups (to insert it). Agreed. I'd say that some very small limit (e.g. checking 8 characters) could be acceptable without performance issues (e.g. to catch low-level manipulation of short strings), but you are right that introducing many element regions would be unacceptable from a performance POV. > And how often is it the case that a string is made of individual byte > manipulation instead of using a string literal (string literal region) or > concatenation function (likely some conjured) or an array initializer > expression (these are represented by CompoundVals). My guess is that it's > fairly uncommon, but prove me wrong with data. This is also a good point. I suspect that some individual byte manipulation _does_ happen when the code constructs very short strings (or constructs strings in a loop where it's possible that only few iterations happen), but it shouldn't be too common. I don't think that this "check whether the individual characters are zero" logic (implemented in e.g. https://github.com/llvm/llvm-project/pull/149106 ) is _completely_ useless, but it would be a fairly niche checker. https://github.com/llvm/llvm-project/pull/146664 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
