================
@@ -0,0 +1,295 @@
+//=== MissingTerminatingZeroChecker.cpp -------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Check for string arguments passed to C library functions where the
+// terminating zero is missing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+#include "llvm/ADT/BitVector.h"
+#include <sstream>
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+
+struct StringData {
+  const MemRegion *StrRegion;
+  int64_t StrLength;
+  unsigned int Offset;
+  const llvm::BitVector *NonNullData;
+};
+
+class MissingTerminatingZeroChecker
+    : public Checker<check::Bind, check::PreCall> {
+public:
+  void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+
+  void initOptions(bool NoDefaultIgnore, StringRef IgnoreList);
+
+private:
+  const BugType BT{this, "Missing terminating zero"};
+
+  using IgnoreEntry = std::pair<int, int>;
+  /// Functions (identified by name only) to ignore.
+  /// The entry stores a parameter index, or -1.
+  llvm::StringMap<IgnoreEntry> FunctionsToIgnore = {
+      {"stpncpy", {1, -1}}, {"strncat", {1, -1}}, {"strncmp", {0, 1}},
+      {"strncpy", {1, -1}}, {"strndup", {0, -1}}, {"strnlen", {0, -1}},
+  };
+
+  bool checkArg(unsigned int ArgI, CheckerContext &C,
+                const CallEvent &Call) const;
+  bool getStringData(StringData &DataOut, ProgramStateRef State,
+                     SValBuilder &SVB, const MemRegion *StrReg) const;
+  ProgramStateRef setStringData(ProgramStateRef State, Loc L,
+                                const llvm::BitVector &NonNullData) const;
+  void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C,
+                 const char Msg[]) const;
+};
+
+} // namespace
+
+namespace llvm {
+template <> struct FoldingSetTrait<llvm::BitVector> {
+  static inline void Profile(llvm::BitVector X, FoldingSetNodeID &ID) {
+    ID.AddInteger(X.size());
+    for (unsigned int I = 0; I < X.size(); ++I)
+      ID.AddBoolean(X[I]);
+  }
+};
+} // end namespace llvm
+
+// Contains for a "string" (character array) region if elements are known to be
+// non-zero. The bit vector is indexed by the array element index and is true
+// if the element is known to be non-zero. Size of the vector does not
+// correspond to the extent of the memory region (can be smaller), the missing
+// elements are considered to be false.
+// (A value of 'false' means that the string element is zero or unknown.)
+REGISTER_MAP_WITH_PROGRAMSTATE(NonNullnessData, const MemRegion *,
+                               llvm::BitVector)
----------------
NagyDonat wrote:

> There is one Achilles heel of the RegionStore. Copying an object uses LCVs, 
> which can only be bound as a default binding, which doesn't have an extent. 
> It's correct that LCVs are default bindings. The problem is that default 
> bindings doesn't have an extent, aka. end/length.

It would be _really_ nice to eliminate this limitation...

> There is a lengthy discussion of the proposal that we at Sonar believe would 
> work, and we actually have a really early draft prototype of what I described 
> there. Unfortunately, we never got back to finishing it.

Is this prototype available for public review? I would definitely like to have 
a look at it to understand the outline and challenges of this task. (Depending 
on the details, I may consider working on this goal -- but obviously I wouldn't 
want to start that without checking the prior work and ensuring alignment with 
your efforts.)

> > > Or would it be better to "get" (or create?) an element region for all 
> > > elements of the string when for example a `strlen` is called?
> > 
> > 
> > Yes, this is the standard approach, so it would be better (unless there are 
> > unavoidable issues with it). Obviously we must introduce a practical limit 
> > (32? 128? 256?) for the number of checked elements -- to avoid performance 
> > issues -- but otherwise this should work.
> 
> I'd be very careful of doing this. Asking for an ElementRegion will 
> inherently allocate, and do hashtable lookups (to insert it).

Agreed. I'd say that some very small limit (e.g. checking 8 characters) could 
be acceptable without performance issues (e.g. to catch low-level manipulation 
of short strings), but you are right that introducing many element regions 
would be unacceptable from a performance POV.

> And how often is it the case that a string is made of individual byte 
> manipulation instead of using a string literal (string literal region) or 
> concatenation function (likely some conjured) or an array initializer 
> expression (these are represented by CompoundVals). My guess is that it's 
> fairly uncommon, but prove me wrong with data.

This is also a good point. I suspect that some individual byte manipulation 
_does_ happen when the code constructs very short strings (or constructs 
strings in a loop where it's possible that only few iterations happen), but it 
shouldn't be too common.

I don't think that this "check whether the individual characters are zero" 
logic (implemented in e.g. https://github.com/llvm/llvm-project/pull/149106 ) 
is _completely_ useless, but it would be a fairly niche checker.

https://github.com/llvm/llvm-project/pull/146664
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to