NoQ updated this revision to Diff 65369.
NoQ marked 4 inline comments as done.

https://reviews.llvm.org/D20811

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
  test/Analysis/std-library-functions.c
  test/Analysis/std-library-functions.cpp

Index: test/Analysis/std-library-functions.cpp
===================================================================
--- /dev/null
+++ test/Analysis/std-library-functions.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s
+
+// Test that we don't model functions with broken prototypes.
+// Because they probably work differently as well.
+//
+// This test lives in a separate file because we wanted to test all functions
+// in the .c file, however in C there are no overloads.
+
+void clang_analyzer_eval(bool);
+bool isalpha(char);
+
+void test() {
+  clang_analyzer_eval(isalpha('A')); // no-crash // expected-warning{{UNKNOWN}}
+}
Index: test/Analysis/std-library-functions.c
===================================================================
--- /dev/null
+++ test/Analysis/std-library-functions.c
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s
+
+void clang_analyzer_eval(int);
+
+int glob;
+
+typedef struct FILE FILE;
+#define EOF -1
+
+int getc(FILE *);
+void test_getc(FILE *fp) {
+  int x;
+  while ((x = getc(fp)) != EOF) {
+    clang_analyzer_eval(x > 255); // expected-warning{{FALSE}}
+    clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}}
+  }
+}
+
+int fgetc(FILE *);
+void test_fgets(FILE *fp) {
+  clang_analyzer_eval(fgetc(fp) < 256); // expected-warning{{TRUE}}
+  clang_analyzer_eval(fgetc(fp) >= 0); // expected-warning{{UNKNOWN}}
+}
+
+
+typedef unsigned long size_t;
+typedef signed long ssize_t;
+ssize_t read(int, void *, size_t);
+ssize_t write(int, const void *, size_t);
+void test_read_write(int fd, char *buf) {
+  glob = 1;
+  ssize_t x = write(fd, buf, 10);
+  clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}}
+  if (x >= 0) {
+    clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}}
+    ssize_t y = read(fd, &glob, sizeof(glob));
+    if (y >= 0) {
+      clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{TRUE}}
+    } else {
+      // -1 overflows on promotion!
+      clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{FALSE}}
+    }
+  } else {
+    clang_analyzer_eval(x == -1); // expected-warning{{TRUE}}
+  }
+}
+
+size_t fread(void *, size_t, size_t, FILE *);
+size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict);
+void test_fread_fwrite(FILE *fp, int *buf) {
+  size_t x = fwrite(buf, sizeof(int), 10, fp);
+  clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}}
+  size_t y = fread(buf, sizeof(int), 10, fp);
+  clang_analyzer_eval(y <= 10); // expected-warning{{TRUE}}
+  size_t z = fwrite(buf, sizeof(int), y, fp);
+  // FIXME: should be TRUE once symbol-symbol constraint support is improved.
+  clang_analyzer_eval(z <= y); // expected-warning{{UNKNOWN}}
+}
+
+ssize_t getline(char **, size_t *, FILE *);
+void test_getline(FILE *fp) {
+  char *line = 0;
+  size_t n = 0;
+  ssize_t len;
+  while ((len = getline(&line, &n, fp)) != -1) {
+    clang_analyzer_eval(len == 0); // expected-warning{{FALSE}}
+  }
+}
+
+int isascii(int);
+void test_isascii(int x) {
+  clang_analyzer_eval(isascii(123)); // expected-warning{{TRUE}}
+  clang_analyzer_eval(isascii(-1)); // expected-warning{{FALSE}}
+  if (isascii(x)) {
+    clang_analyzer_eval(x < 128); // expected-warning{{TRUE}}
+    clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}}
+  } else {
+    if (x > 42)
+      clang_analyzer_eval(x >= 128); // expected-warning{{TRUE}}
+    else
+      clang_analyzer_eval(x < 0); // expected-warning{{TRUE}}
+  }
+  glob = 1;
+  isascii('a');
+  clang_analyzer_eval(glob); // expected-warning{{TRUE}}
+}
+
+int islower(int);
+void test_islower(int x) {
+  clang_analyzer_eval(islower('x')); // expected-warning{{TRUE}}
+  clang_analyzer_eval(islower('X')); // expected-warning{{FALSE}}
+  if (islower(x))
+    clang_analyzer_eval(x < 'a'); // expected-warning{{FALSE}}
+}
+
+int getchar(void);
+void test_getchar() {
+  int x = getchar();
+  if (x == EOF)
+    return;
+  clang_analyzer_eval(x < 0); // expected-warning{{FALSE}}
+  clang_analyzer_eval(x < 256); // expected-warning{{TRUE}}
+}
+
+int isalpha(int);
+void test_isalpha() {
+  clang_analyzer_eval(isalpha(']')); // expected-warning{{FALSE}}
+  clang_analyzer_eval(isalpha('Q')); // expected-warning{{TRUE}}
+  clang_analyzer_eval(isalpha(128)); // expected-warning{{UNKNOWN}}
+}
+
+int isalnum(int);
+void test_alnum() {
+  clang_analyzer_eval(isalnum('1')); // expected-warning{{TRUE}}
+  clang_analyzer_eval(isalnum(')')); // expected-warning{{FALSE}}
+}
+
+int isblank(int);
+void test_isblank() {
+  clang_analyzer_eval(isblank('\t')); // expected-warning{{TRUE}}
+  clang_analyzer_eval(isblank(' ')); // expected-warning{{TRUE}}
+  clang_analyzer_eval(isblank('\n')); // expected-warning{{FALSE}}
+}
+
+int ispunct(int);
+void test_ispunct(int x) {
+  clang_analyzer_eval(ispunct(' ')); // expected-warning{{FALSE}}
+  clang_analyzer_eval(ispunct(-1)); // expected-warning{{FALSE}}
+  clang_analyzer_eval(ispunct('#')); // expected-warning{{TRUE}}
+  clang_analyzer_eval(ispunct('_')); // expected-warning{{TRUE}}
+  if (ispunct(x))
+    clang_analyzer_eval(x < 127); // expected-warning{{TRUE}}
+}
+
+int isupper(int);
+void test_isupper(int x) {
+  if (isupper(x))
+    clang_analyzer_eval(x < 'A'); // expected-warning{{FALSE}}
+}
+
+int isgraph(int);
+int isprint(int);
+void test_isgraph_isprint(int x) {
+  char y = x;
+  if (isgraph(y))
+    clang_analyzer_eval(isprint(x)); // expected-warning{{TRUE}}
+}
+
+int isdigit(int);
+void test_mixed_branches(int x) {
+  if (isdigit(x)) {
+    clang_analyzer_eval(isgraph(x)); // expected-warning{{TRUE}}
+    clang_analyzer_eval(isblank(x)); // expected-warning{{FALSE}}
+  } else if (isascii(x)) {
+    // isalnum() bifurcates here.
+    clang_analyzer_eval(isalnum(x)); // expected-warning{{TRUE}} // expected-warning{{FALSE}}
+    clang_analyzer_eval(isprint(x)); // expected-warning{{TRUE}} // expected-warning{{FALSE}}
+  }
+}
+
+int isspace(int);
+void test_isspace(int x) {
+  if (!isascii(x))
+    return;
+  char y = x;
+  if (y == ' ')
+    clang_analyzer_eval(isspace(x)); // expected-warning{{TRUE}}
+}
+
+int isxdigit(int);
+void test_isxdigit(int x) {
+  if (isxdigit(x) && isupper(x)) {
+    clang_analyzer_eval(x >= 'A'); // expected-warning{{TRUE}}
+    clang_analyzer_eval(x <= 'F'); // expected-warning{{TRUE}}
+  }
+}
+
+void test_call_by_pointer() {
+  typedef int (*func)(int);
+  func f = isascii;
+  clang_analyzer_eval(f('A')); // expected-warning{{TRUE}}
+  f = ispunct;
+  clang_analyzer_eval(f('A')); // expected-warning{{FALSE}}
+}
Index: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
===================================================================
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
@@ -0,0 +1,836 @@
+//=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This checker improves modeling of a few simple library functions.
+// It does not throw warnings.
+//
+// This checker provides a specification format - `FunctionSpecTy' - and
+// contains descriptions of some library functions in this format. Each
+// specification contains a list of branches for splitting the program state
+// upon call, and range constraints on argument and return-value symbols that
+// are satisfied on each branch. This spec can be expanded to include more
+// items, like external effects of the function.
+//
+// The main difference between this approach and the body farms technique is
+// in more explicit control over how many branches are produced. For example,
+// consider standard C function `ispunct(int x)', which returns a non-zero value
+// iff `x' is a punctuation character, that is, when `x' is in range
+//   ['!', '/']  U  [':', '@']  U  ['[', '\`']  U  ['{', '~'].
+// `FunctionSpecTy' provides only two branches for this function. However, any
+// attempt to describe this range with if-statements in the body farm
+// would result in many more branches. Because each branch needs to be analyzed
+// independently, this significantly reduces performance. Additionally,
+// once we consider a branch on which `x' is in range, say, ['!', '/'],
+// we assume that such branch is an important separate path through the program,
+// which may lead to false positives because considering this particular path
+// was not consciously intended, and therefore it might have been unreachable.
+//
+// This checker uses eval::Call for modeling "pure" functions, for which
+// their `FunctionSpecTy' is a precise model. This avoids unnecessary
+// invalidation passes. Conflicts with other checkers are unlikely because
+// if the function has no other effects, other checkers would probably never
+// want to improve upon the modeling done by this checker.
+//
+// Non-"pure" functions, for which only partial improvement over the default
+// behavior is expected, are modeled via check::PostCall, non-intrusively.
+//
+// The following standard C functions are currently supported:
+//
+//   fgetc      getline   isdigit   isupper
+//   fread      isalnum   isgraph   isxdigit
+//   fwrite     isalpha   islower   read
+//   getc       isascii   isprint   write
+//   getchar    isblank   ispunct
+//   getdelim   iscntrl   isspace
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace clang::ento;
+
+namespace {
+class StdLibraryFunctionsChecker : public Checker<check::PostCall, eval::Call> {
+  /// Below is a series of typedefs necessary to define function specs.
+  /// We avoid nesting types here because each additional qualifier
+  /// would need to be repeated in every function spec.
+  struct FunctionSpecTy;
+
+  /// Specify how much the analyzer engine should entrust modeling this function
+  /// to us. If he doesn't, he performs additional invalidations.
+  enum InvalidationKindTy { NoEvalCall, EvalCallAsPure };
+
+  /// A pair of ValueRangeKindTy and IntRangeVectorTy would describe a range
+  /// imposed on a particular argument or return value symbol.
+  ///
+  /// Given a range, should the argument stay inside or outside this range?
+  /// The special `ComparesToArgument' value indicates that we should
+  /// impose a constraint that involves other argument or return value symbols.
+  enum ValueRangeKindTy { OutOfRange, WithinRange, ComparesToArgument };
+
+  /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
+  /// a non-negative integer, which less than 5 and not equal to 2. For
+  /// `ComparesToArgument', holds information about how exactly to compare to
+  /// the argument.
+  typedef std::vector<std::pair<int64_t, int64_t>> IntRangeVectorTy;
+
+  /// A reference to an argument or return value by its number.
+  /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
+  /// obviously uint32_t should be enough for all practical purposes.
+  typedef uint32_t ArgNoTy;
+  static const ArgNoTy Ret = std::numeric_limits<ArgNoTy>::max();
+
+  /// Incapsulates a single range on a single symbol within a branch.
+  class ValueRange {
+    ArgNoTy ArgNo; // Argument to which we apply the range.
+    ValueRangeKindTy Kind; // Kind of range definition.
+    IntRangeVectorTy Args; // Polymorphic arguments.
+
+  public:
+    ValueRange(ArgNoTy ArgNo, ValueRangeKindTy Kind,
+               const IntRangeVectorTy &Args)
+        : ArgNo(ArgNo), Kind(Kind), Args(Args) {}
+
+    ArgNoTy getArgNo() const { return ArgNo; }
+    ValueRangeKindTy getKind() const { return Kind; }
+
+    BinaryOperator::Opcode getOpcode() const {
+      assert(Kind == ComparesToArgument);
+      assert(Args.size() == 1);
+      BinaryOperator::Opcode Op =
+          static_cast<BinaryOperator::Opcode>(Args[0].first);
+      assert(BinaryOperator::isComparisonOp(Op) &&
+             "Only comparison ops are supported for ComparesToArgument");
+      return Op;
+    }
+
+    ArgNoTy getOtherArgNo() const {
+      assert(Kind == ComparesToArgument);
+      assert(Args.size() == 1);
+      return static_cast<ArgNoTy>(Args[0].second);
+    }
+
+    const IntRangeVectorTy &getRanges() const {
+      assert(Kind != ComparesToArgument);
+      return Args;
+    }
+
+    // We avoid creating a virtual apply() method because
+    // it makes initializer lists harder to write.
+  private:
+    ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
+                                      const CallEvent &Call,
+                                      const FunctionSpecTy &Spec) const;
+    ProgramStateRef applyAsWithinRange(ProgramStateRef State,
+                                       const CallEvent &Call,
+                                       const FunctionSpecTy &Spec) const;
+    ProgramStateRef applyAsComparesToArgument(ProgramStateRef State,
+                                              const CallEvent &Call,
+                                              const FunctionSpecTy &Spec) const;
+
+  public:
+    ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
+                          const FunctionSpecTy &Spec) const {
+      switch (Kind) {
+      case OutOfRange:
+        return applyAsOutOfRange(State, Call, Spec);
+      case WithinRange:
+        return applyAsWithinRange(State, Call, Spec);
+      case ComparesToArgument:
+        return applyAsComparesToArgument(State, Call, Spec);
+      }
+      llvm_unreachable("Unknown ValueRange kind!");
+    }
+  };
+
+  /// The complete list of ranges that defines a single branch.
+  typedef std::vector<ValueRange> ValueRangeSet;
+
+  /// Includes information about function prototype (which is necessary to
+  /// ensure we're modeling the right function and casting values properly),
+  /// approach to invalidation, and a list of branches - essentially, a list
+  /// of list of ranges - essentially, a list of lists of lists of segments.
+  struct FunctionSpecTy {
+    const std::vector<QualType> ArgTypes;
+    const QualType RetType;
+    const InvalidationKindTy InvalidationKind;
+    const std::vector<ValueRangeSet> Ranges;
+
+  private:
+    static void assertTypeSuitableForSpec(QualType T) {
+      assert(!T->isVoidType() &&
+             "We should have had no significant void types in the spec");
+      assert(T.isCanonical() &&
+             "We should only have canonical types in the spec");
+      // FIXME: lift this assert (but not the ones above!)
+      assert(T->isIntegralOrEnumerationType() &&
+             "We only support integral ranges in the spec");
+    }
+
+  public:
+    QualType getArgType(ArgNoTy ArgNo) const {
+      QualType T = (ArgNo == Ret) ? RetType : ArgTypes[ArgNo];
+      assertTypeSuitableForSpec(T);
+      return T;
+    }
+
+    /// Try our best to figure out if the call expression is the call of
+    /// *the* library function to which this specification applies.
+    bool matchesCall(const CallExpr *CE) const;
+  };
+
+  // The map of all functions supported by the checker. It is initialized
+  // lazily, and it doesn't change after initialization.
+  typedef llvm::StringMap<FunctionSpecTy> FunctionSpecMapTy;
+  mutable FunctionSpecMapTy FunctionSpecMap;
+
+  // Auxiliary functions to support ArgNoTy within all structures
+  // in a unified manner.
+  static QualType getArgType(const FunctionSpecTy &Spec, ArgNoTy ArgNo) {
+    return Spec.getArgType(ArgNo);
+  }
+  static QualType getArgType(const CallEvent &Call, ArgNoTy ArgNo) {
+    return ArgNo == Ret ? Call.getResultType().getCanonicalType()
+                        : Call.getArgExpr(ArgNo)->getType().getCanonicalType();
+  }
+  static QualType getArgType(const CallExpr *CE, ArgNoTy ArgNo) {
+    return ArgNo == Ret ? CE->getType().getCanonicalType()
+                        : CE->getArg(ArgNo)->getType().getCanonicalType();
+  }
+  static SVal getArgSVal(const CallEvent &Call, ArgNoTy ArgNo) {
+    return ArgNo == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgNo);
+  }
+
+public:
+  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
+
+private:
+  Optional<FunctionSpecTy> findFunctionSpec(const FunctionDecl *FD,
+                                          const CallExpr *CE,
+                                          CheckerContext &C) const;
+
+  void initFunctionSpecs(BasicValueFactory &BVF) const;
+};
+} // end of anonymous namespace
+
+ProgramStateRef
+StdLibraryFunctionsChecker::ValueRange::applyAsOutOfRange(
+    ProgramStateRef State, const CallEvent &Call,
+    const FunctionSpecTy &Spec) const {
+
+  ProgramStateManager &Mgr = State->getStateManager();
+  SValBuilder &SVB = Mgr.getSValBuilder();
+  BasicValueFactory &BVF = SVB.getBasicValueFactory();
+  ConstraintManager &CM = Mgr.getConstraintManager();
+  QualType T = getArgType(Spec, getArgNo());
+  SVal V = getArgSVal(Call, getArgNo());
+
+  if (auto N = V.getAs<NonLoc>()) {
+    const IntRangeVectorTy &R = getRanges();
+    size_t E = R.size();
+    for (size_t I = 0; I != E; ++I) {
+      const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
+      const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
+      assert(Min <= Max);
+      State = CM.assumeWithinInclusiveRange(State, *N, Min, Max, false);
+      if (!State)
+        break;
+    }
+  }
+
+  return State;
+}
+
+ProgramStateRef
+StdLibraryFunctionsChecker::ValueRange::applyAsWithinRange(
+    ProgramStateRef State, const CallEvent &Call,
+    const FunctionSpecTy &Spec) const {
+
+  ProgramStateManager &Mgr = State->getStateManager();
+  SValBuilder &SVB = Mgr.getSValBuilder();
+  BasicValueFactory &BVF = SVB.getBasicValueFactory();
+  ConstraintManager &CM = Mgr.getConstraintManager();
+  QualType T = getArgType(Spec, getArgNo());
+  SVal V = getArgSVal(Call, getArgNo());
+
+  // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
+  // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
+  // and then cut away all holes in R one by one.
+  if (auto N = V.getAs<NonLoc>()) {
+    const IntRangeVectorTy &R = getRanges();
+    size_t E = R.size();
+
+    const llvm::APSInt &MinusInf = BVF.getMinValue(T);
+    const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
+
+    const llvm::APSInt &Left = BVF.getValue(R[0].first - 1, T);
+    if (Left != PlusInf) {
+      assert(MinusInf <= Left);
+      State = CM.assumeWithinInclusiveRange(State, *N, MinusInf, Left, false);
+      if (!State)
+        return nullptr;
+    }
+
+    const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1, T);
+    if (Right != MinusInf) {
+      assert(Right <= PlusInf);
+      State = CM.assumeWithinInclusiveRange(State, *N, Right, PlusInf, false);
+      if (!State)
+        return nullptr;
+    }
+
+    for (size_t I = 1; I != E; ++I) {
+      const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1, T);
+      const llvm::APSInt &Max = BVF.getValue(R[I].first - 1, T);
+      assert(Min <= Max);
+      State = CM.assumeWithinInclusiveRange(State, *N, Min, Max, false);
+      if (!State)
+        return nullptr;
+    }
+  }
+
+  return State;
+}
+
+ProgramStateRef
+StdLibraryFunctionsChecker::ValueRange::applyAsComparesToArgument(
+    ProgramStateRef State, const CallEvent &Call,
+    const FunctionSpecTy &Spec) const {
+
+  ProgramStateManager &Mgr = State->getStateManager();
+  SValBuilder &SVB = Mgr.getSValBuilder();
+  QualType CondT = SVB.getConditionType();
+  QualType T = getArgType(Spec, getArgNo());
+  SVal V = getArgSVal(Call, getArgNo());
+
+  BinaryOperator::Opcode Op = getOpcode();
+  ArgNoTy OtherArg = getOtherArgNo();
+  SVal OtherV = getArgSVal(Call, OtherArg);
+  QualType OtherT = getArgType(Call, OtherArg);
+  // Note: we avoid integral promotion for comparison.
+  OtherV = SVB.evalCast(OtherV, T, OtherT);
+  if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
+                       .getAs<DefinedOrUnknownSVal>())
+    State = State->assume(*CompV, true);
+  return State;
+}
+
+void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
+                                               CheckerContext &C) const {
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
+  if (!FD)
+    return;
+
+  const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
+  if (!CE)
+    return;
+
+  Optional<FunctionSpecTy> FoundSpec = findFunctionSpec(FD, CE, C);
+  if (!FoundSpec)
+    return;
+
+  // Now apply ranges.
+  const FunctionSpecTy &Spec = *FoundSpec;
+  ProgramStateRef State = C.getState();
+
+  for (const auto &VRS: Spec.Ranges) {
+    ProgramStateRef NewState = State;
+    for (const auto &VR: VRS) {
+      NewState = VR.apply(NewState, Call, Spec);
+      if (!NewState)
+        break;
+    }
+
+    if (NewState && NewState != State)
+      C.addTransition(NewState);
+  }
+}
+
+bool StdLibraryFunctionsChecker::evalCall(const CallExpr *CE,
+                                          CheckerContext &C) const {
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CE->getCalleeDecl());
+  if (!FD)
+    return false;
+
+  Optional<FunctionSpecTy> FoundSpec = findFunctionSpec(FD, CE, C);
+  if (!FoundSpec)
+    return false;
+
+  const FunctionSpecTy &Spec = *FoundSpec;
+  switch (Spec.InvalidationKind) {
+  case EvalCallAsPure: {
+    ProgramStateRef State = C.getState();
+    const LocationContext *LC = C.getLocationContext();
+    SVal V = C.getSValBuilder().conjureSymbolVal(
+        CE, LC, CE->getType().getCanonicalType(), C.blockCount());
+    State = State->BindExpr(CE, LC, V);
+    C.addTransition(State);
+    return true;
+  }
+  case NoEvalCall:
+    // Spec tells us to avoid performing eval::Call. The function is possibly
+    // evaluated by another checker, or evaluated conservatively.
+    return false;
+  }
+  llvm_unreachable("Unknown invalidation kind!");
+}
+
+bool StdLibraryFunctionsChecker::FunctionSpecTy::matchesCall(
+    const CallExpr *CE) const {
+  // Check number of arguments:
+  if (CE->getNumArgs() != ArgTypes.size())
+    return false;
+
+  // Check return type if relevant:
+  if (!RetType.isNull() && RetType != CE->getType().getCanonicalType())
+    return false;
+
+  // Check argument types when relevant:
+  for (size_t I = 0, E = ArgTypes.size(); I != E; ++I) {
+    QualType FormalT = ArgTypes[I];
+    // Null type marks irrelevant arguments.
+    if (FormalT.isNull())
+      continue;
+
+    assertTypeSuitableForSpec(FormalT);
+
+    QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
+    assert(ActualT.isCanonical());
+    if (ActualT != FormalT)
+      return false;
+  }
+
+  return true;
+}
+
+Optional<StdLibraryFunctionsChecker::FunctionSpecTy>
+StdLibraryFunctionsChecker::findFunctionSpec(const FunctionDecl *FD,
+                                             const CallExpr *CE,
+                                             CheckerContext &C) const {
+  // Note: we cannot always obtain FD from CE
+  // (eg. virtual call, or call by pointer).
+  assert(CE);
+
+  if (!FD)
+    return None;
+
+  SValBuilder &SVB = C.getSValBuilder();
+  BasicValueFactory &BVF = SVB.getBasicValueFactory();
+  initFunctionSpecs(BVF);
+
+  std::string Name = FD->getQualifiedNameAsString();
+  if (Name.empty() || !C.isCLibraryFunction(FD, Name))
+    return None;
+
+  auto FSMI = FunctionSpecMap.find(Name);
+  if (FSMI == FunctionSpecMap.end())
+    return None;
+
+  // Verify that function signature matches the spec in advance.
+  // Otherwise we might be modeling the wrong function.
+  // Strict checking is important because we will be conducting
+  // very integral-type-sensitive operations on arguments and
+  // return values.
+  const FunctionSpecTy &Spec = FSMI->second;
+  if (!Spec.matchesCall(CE))
+    return None;
+
+  return Spec;
+}
+
+void StdLibraryFunctionsChecker::initFunctionSpecs(
+    BasicValueFactory &BVF) const {
+  if (!FunctionSpecMap.empty())
+    return;
+
+  ASTContext &ACtx = BVF.getContext();
+
+  QualType Irrelevant;
+  QualType IntTy = ACtx.IntTy;
+  QualType SizeTy = ACtx.getSizeType();
+  QualType SSizeTy = ACtx.getIntTypeForBitwidth(ACtx.getTypeSize(SizeTy), true);
+
+  // Don't worry about truncation here, it'd be cast back to SIZE_MAX when used.
+  LLVM_ATTRIBUTE_UNUSED int64_t SizeMax =
+      BVF.getMaxValue(SizeTy).getLimitedValue();
+  int64_t SSizeMax =
+    BVF.getMaxValue(SSizeTy).getLimitedValue();
+
+  // We are finally ready to define specifications for all supported functions.
+  //
+  // The signature needs to have the correct number of arguments.
+  // However, we insert `Irrelevant' when the type is insignificant.
+  //
+  // Argument ranges should always cover all variants. If return value
+  // is completely unknown, omit it from the respective range set.
+  //
+  // All types in the spec need to be canonical.
+  //
+  // Every item in the list of range sets represents a particular
+  // execution path the analyzer would need to explore once
+  // the call is modeled - a new program state is constructed
+  // for every range set, and each range line in the range set
+  // corresponds to a specific constraint within this state.
+  //
+  // Upon comparing to another argument, the other argument is casted
+  // to the current argument's type. This avoids proper promotion but
+  // seems useful. For example, read() receives size_t argument,
+  // and its return value, which is of type ssize_t, cannot be greater
+  // than this argument. If we made a promotion, and the size argument
+  // is equal to, say, 10, then we'd impose a range of [0, 10] on the
+  // return value, however the correct range is [-1, 10].
+  //
+  // Please update the list of functions in the header after editing!
+  //
+  // The format is as follows:
+  //
+  //{ "function name",
+  //  { spec:
+  //    { argument types list, ... },
+  //    return type, purity, { range set list:
+  //      { range list:
+  //        { argument index, within or out of, {{from, to}, ...} },
+  //        { argument index, compares to argument, {{how, which}} },
+  //        ...
+  //      }
+  //    }
+  //  }
+  //}
+
+  FunctionSpecMap = {
+    // The isascii() family of functions.
+    { "isalnum",
+      {
+        /*ArgTypes=*/ { IntTy },
+        /*RetType=*/ IntTy, /*InvalidationKind=*/ EvalCallAsPure, /*Ranges=*/ {
+          { // Boils down to isupper() or islower() or isdigit()
+            { /*ArgNo=*/ 0U, /*Kind=*/ WithinRange, /*Args=*/{{'0', '9'},
+                                                              {'A', 'Z'},
+                                                              {'a', 'z'}} },
+            { /*Etc.*/Ret, OutOfRange, {{0, 0}} }
+          },
+          { // The locale-specific range.
+            { 0U, WithinRange, {{128, 255}} }
+          },
+          { // Other.
+            { 0U, OutOfRange, {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, 255}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "isalpha",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // isupper() or islower(). Note that 'Z' is less than 'a'.
+            { 0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          { // The locale-specific range.
+            { 0U, WithinRange, {{128, 255}} },
+          },
+          { // Other.
+            { 0U, OutOfRange, {{'A', 'Z'}, {'a', 'z'}, {128, 255}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "isascii",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // Is ASCII.
+            { 0U, WithinRange, {{0, 127}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          { // Is not ASCII.
+            { 0U, OutOfRange, {{0, 127}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "isblank",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // Is tab or space.
+            { 0U, WithinRange, {{'\t', '\t'}, {' ', ' '}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          { // Other.
+            { 0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "iscntrl",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // 0..31 or 127
+            { 0U, WithinRange, {{0, 32}, {127, 127}} },
+            { Ret, OutOfRange, {{0, 0}} },
+          },
+          {
+            { 0U, OutOfRange, {{0, 32}, {127, 127}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "isdigit",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // Is a digit.
+            { 0U, WithinRange, {{'0', '9'}} },
+            { Ret, OutOfRange, {{0, 0}} },
+          },
+          {
+            { 0U, OutOfRange, {{'0', '9'}} },
+            { Ret, WithinRange, {{0, 0}} },
+          }
+        }
+      }
+    },
+    { "isgraph",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          {
+            { 0U, WithinRange, {{33, 126}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          {
+            { 0U, OutOfRange, {{33, 126}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "islower",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // Is certainly uppercase.
+            { 0U, WithinRange, {{'a', 'z'}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          { // Is ascii but not uppercase.
+            { 0U, WithinRange, {{0, 127}} },
+            { 0U, OutOfRange, {{'a', 'z'}} },
+            { Ret, WithinRange, {{0, 0}} }
+          },
+          { // The locale-specific range.
+            { 0U, WithinRange, {{128, 255}} }
+          },
+          { // Is not an unsigned char.
+            { 0U, OutOfRange, {{0, 255}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "isprint",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          {
+            { 0U, WithinRange, {{32, 126}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          {
+            { 0U, OutOfRange, {{32, 126}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "ispunct",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          {
+            { 0U, WithinRange, {{'!', '/'}, {':', '@'},
+                                {'[', '`'}, {'{', '~'}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          {
+            { 0U, OutOfRange, {{'!', '/'}, {':', '@'},
+                               {'[', '`'}, {'{', '~'}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "isspace",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // Space, '\f', '\n', '\r', '\t', '\v'.
+            { 0U, WithinRange, {{9, 13}, {' ', ' '}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          { // The locale-specific range.
+            { 0U, WithinRange, {{128, 255}} }
+          },
+          {
+            { 0U, OutOfRange, {{9, 13}, {' ', ' '}, {128, 255}} },
+            { Ret, WithinRange, {{0, 0}} }
+          },
+        }
+      }
+    },
+    { "isupper",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          { // Is certainly uppercase.
+            { 0U, WithinRange, {{'A', 'Z'}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          { // The locale-specific range.
+            { 0U, WithinRange, {{128, 255}} }
+          },
+          { // Other.
+            { 0U, OutOfRange, {{'A', 'Z'}, {128, 255}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+    { "isxdigit",
+      {
+        { IntTy },
+        IntTy, EvalCallAsPure, {
+          {
+            { 0U, WithinRange, {{'0', '9'}, {'A', 'F'}, {'a', 'f'}} },
+            { Ret, OutOfRange, {{0, 0}} }
+          },
+          {
+            { 0U, OutOfRange, {{'0', '9'}, {'A', 'F'}, {'a', 'f'}} },
+            { Ret, WithinRange, {{0, 0}} }
+          }
+        }
+      }
+    },
+
+    // The getc() family of functions that returns either a char or an EOF.
+    { "getc",
+      {
+        { Irrelevant },
+        IntTy, NoEvalCall, {
+          { // FIXME: EOF is assumed to be defined as -1.
+            { Ret, WithinRange, {{-1, 255}} }
+          }
+        }
+      }
+    },
+    { "fgetc",
+      {
+        { Irrelevant },
+        IntTy, NoEvalCall, {
+          { // FIXME: EOF is assumed to be defined as -1.
+            { Ret, WithinRange, {{-1, 255}} }
+          }
+        }
+      }
+    },
+    { "getchar",
+      {
+        { },
+        IntTy, NoEvalCall, {
+          { // FIXME: EOF is assumed to be defined as -1.
+            { Ret, WithinRange, {{-1, 255}} }
+          }
+        }
+      }
+    },
+
+    // read()-like functions that never return more than buffer size.
+    { "read",
+      {
+        { Irrelevant, Irrelevant, SizeTy },
+        SSizeTy, NoEvalCall, {
+          {
+            { Ret, ComparesToArgument, {{BO_LE, 2U}} },
+            { Ret, WithinRange, {{-1, SSizeMax}} }
+          },
+        }
+      }
+    },
+    { "write",
+      {
+        { Irrelevant, Irrelevant, SizeTy },
+        SSizeTy, NoEvalCall, {
+          {
+            { Ret, ComparesToArgument, {{BO_LE, 2U}} },
+            { Ret, WithinRange, {{-1, SSizeMax}} }
+          },
+        }
+      }
+    },
+    { "fread",
+      {
+        { Irrelevant, Irrelevant, SizeTy, Irrelevant },
+        SizeTy, NoEvalCall, {
+          {
+            { Ret, ComparesToArgument, {{BO_LE, 2U}} }
+          }
+        }
+      }
+    },
+    { "fwrite",
+      {
+        { Irrelevant, Irrelevant, SizeTy, Irrelevant },
+        SizeTy, NoEvalCall, {
+          {
+            { Ret, ComparesToArgument, {{BO_LE, 2U}} }
+          }
+        }
+      }
+    },
+
+    // getline()-like functions either fail or read at least the delimiter.
+    { "getline",
+      {
+        { Irrelevant, Irrelevant, Irrelevant },
+        SSizeTy, NoEvalCall, {
+          {
+            { Ret, WithinRange, {{-1, -1}, {1, SSizeMax}} }
+          },
+        }
+      }
+    },
+    { "getdelim",
+      {
+        { Irrelevant, Irrelevant, Irrelevant, Irrelevant },
+        SSizeTy, NoEvalCall, {
+          {
+            { Ret, WithinRange, {{-1, -1}, {1, SSizeMax}} }
+          },
+        }
+      }
+    }
+  };
+}
+
+void ento::registerStdLibraryFunctionsChecker(CheckerManager &mgr) {
+  mgr.registerChecker<StdLibraryFunctionsChecker>();
+}
Index: lib/StaticAnalyzer/Checkers/CMakeLists.txt
===================================================================
--- lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -66,6 +66,7 @@
   ReturnUndefChecker.cpp
   SimpleStreamChecker.cpp
   StackAddrEscapeChecker.cpp
+  StdLibraryFunctionsChecker.cpp
   StreamChecker.cpp
   TaintTesterChecker.cpp
   TestAfterDivZeroChecker.cpp
Index: include/clang/StaticAnalyzer/Checkers/Checkers.td
===================================================================
--- include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -386,6 +386,10 @@
   HelpText<"Check for proper usage of vfork">,
   DescFile<"VforkChecker.cpp">;
 
+def StdLibraryFunctionsChecker : Checker<"StdLibraryFunctions">,
+  HelpText<"Improve modeling of standard library functions">,
+  DescFile<"StdLibraryFunctionsChecker.cpp">;
+
 } // end "unix"
 
 let ParentPackage = UnixAlpha in {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to