ASDenysPetrov updated this revision to Diff 357463.
ASDenysPetrov added a comment.

Added more descriptive comments. Fixed 
`RangeConstraintManager::updateExistingConstraints` function.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103096/new/

https://reviews.llvm.org/D103096

Files:
  clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
  clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
  clang/test/Analysis/symbol-integral-cast.cpp

Index: clang/test/Analysis/symbol-integral-cast.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-integral-cast.cpp
@@ -0,0 +1,353 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -analyzer-config eagerly-assume=false -analyzer-config support-symbolic-integer-casts=true -verify %s
+
+template <typename T>
+void clang_analyzer_eval(T);
+void clang_analyzer_warnIfReached();
+
+typedef short int16_t;
+typedef int int32_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+void test1(int x) {
+  // Even if two lower bytes of `x` equal to zero, it doesn't mean that
+  // the entire `x` is zero. We are not able to know the exact value of x.
+  // It can be one of  65536 possible values like [0, 65536, 131072, ...]
+  // and so on. To avoid huge range sets we still assume `x` in the range
+  // [INT_MIN, INT_MAX].
+  if (!(short)x) {
+    if (!x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test2(int x) {
+  // If two lower bytes of `x` equal to zero, and we know x to be 65537,
+  // which is not truncated to short as zero. Thus the branch is infisible.
+  short s = x;
+  if (!s) {
+    if (x == 65537)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test3(int x, short s) {
+  s = x;
+  if ((short)x > -10 && s < 10) {
+    if (x > 0 && x < 10) {
+      // If the range of the whole variable was constrained then reason again
+      // about truncated bytes to make the ranges more precise.
+      clang_analyzer_eval((short)x <= 0); // expected-warning {{FALSE}}
+    }
+  }
+}
+
+void test4(unsigned x) {
+  if ((char)x > 8) {
+    // Constraint the range of the lowest byte of `x` to [9, CHAR_MAX].
+    // The original range of `x` still remains [0, UINT_MAX].
+    clang_analyzer_eval((char)x < 42); // expected-warning {{UNKNOWN}}
+    if (x < 42) {
+      // Constraint the original range to [0, 42] and update (re-constraint)
+      // the range of the lowest byte of 'x' to [9, 42].
+      clang_analyzer_eval((char)x < 42); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test5(unsigned x) {
+  if ((char)x > -10 && (char)x < 10) {
+    if ((short)x == 8) {
+      // If the range of higher bytes(short) was constrained then reason again
+      // about smaller truncated ranges(char) to make it more precise.
+      clang_analyzer_eval((char)x == 8);  // expected-warning {{TRUE}}
+      clang_analyzer_eval((short)x == 8); // expected-warning {{TRUE}}
+      // We still assume full version of `x` in the range [INT_MIN, INT_MAX].
+      clang_analyzer_eval(x == 8); // expected-warning {{UNKNOWN}}
+    }
+  }
+}
+
+void test6(int x) {
+  // Even if two lower bytes of `x` less than zero, it doesn't mean that `x`
+  // can't be greater than zero. Thence we don't change the native range of
+  // `x` and this branch is feasible.
+  if (x > 0)
+    if ((short)x < 0)
+      clang_analyzer_eval(x > 0); // expected-warning {{TRUE}}
+}
+
+void test7(int x) {
+  // The range of two lower bytes of `x` [1, SHORT_MAX] is enough to cover
+  // all possible values of char [CHAR_MIN, CHAR_MAX]. So the lowest byte
+  // can be lower than zero.
+  if ((short)x > 0) {
+    if ((char)x < 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test8(int x) {
+  // Promotion from `signed int` to `signed long long` also reasoning about the
+  // original range, because we know the fact that even after promotion it
+  // remains in the range [INT_MIN, INT_MAX].
+  if ((long long)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{TRUE}}
+}
+
+void test9(signed int x) {
+  // Any cast `signed` to `unsigned` produces an unsigned range, which is
+  // [0, UNSIGNED_MAX] and can not be lower than zero.
+  if ((unsigned long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned int)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned short)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned char)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test10(unsigned int x, signed char sc) {
+  // Promotion from `unsigned` to `signed` produces a signed range,
+  // which is able to cover all the values of the original,
+  // so that such cast is not lower than zero.
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]
+  // and can be lower than zero.
+  if ((signed int)x < 0)            // explicit cast
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  signed short ss = x; // initialization
+  if (ss < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  sc = x; // assignment
+  if (sc < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test11(unsigned int x) {
+  // Promotion from 'unsigned' to 'signed' entirely covers the original range.
+  // Thence such cast is not lower than zero and the `true` branch is
+  // infiseable. But it doesn't affect the original range, which still remains
+  // as [0, UNSIGNED_MAX].
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]. But it doesn't
+  // affect the original range, which still remains as [0, UNSIGNED_MAX].
+  if ((signed int)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed short)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed char)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+}
+
+void test12(int x, char c) {
+  if (x >= 5308) {
+    if (x <= 5419) {
+      // Truncation on assignment: int[5308, 5419] -> char[-68, 43]
+      c = x;
+      clang_analyzer_eval(-68 <= c && c <= 43); // expected-warning {{TRUE}}
+
+      if (c < 50)
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+      else
+        clang_analyzer_warnIfReached(); // no-warning
+
+      // Truncation on initializaion: int[5308, 5419] -> char[-68, 43]
+      char c1 = x;
+      clang_analyzer_eval(-68 <= c1 && c1 <= 43); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test13(int x) {
+  if (x > 913440767 && x < 913440769) { // 0x36720000
+
+    if ((short)x)                     // Truncation: int[913440768] -> short[0]
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+    if ((short)x != 0)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test14(int x) {
+  if (x >= -1569193983 && x <= 578290016) {
+    // The big range of `x` covers all possible values of short.
+    // Truncation: int[-1569193983, 578290016] -> short[-32768, 32767]
+    if ((short)x > 0) {
+      clang_analyzer_eval(-1569193983 <= x && x <= 578290016); // expected-warning {{TRUE}}
+      short s = x;
+      clang_analyzer_eval(-32768 <= s && s <= 32767); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test15(int x) {
+  if (x >= -1569193983 && x <= -1569193871) { // [0xA2780001, 0xA2780071]
+    // The small range of `x` covers only several values of short.
+    // Truncation: int[-1569193983, -1569193871] -> short[1, 113]
+    if ((short)x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x > 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x < 114)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+  }
+}
+
+void test16(char x) {
+  if (x < 0)
+    clang_analyzer_eval(-128 <= x && x < 0); // expected-warning {{TRUE}}
+  else
+    clang_analyzer_eval(0 <= x && x <= 127); // expected-warning {{TRUE}}
+}
+
+void test17(char x) {
+  if (-11 <= x && x <= -10) {
+    unsigned u = x;
+    // Conversion: char[-11, -10] -> unsigned int[4294967285, 4294967286]
+    clang_analyzer_eval(4294967285 <= u && u <= 4294967286); // expected-warning {{TRUE}}
+    unsigned short us = x;
+    // Conversion: char[-11, -10] -> unsigned short[65525, 65526]
+    clang_analyzer_eval(65525 <= us && us <= 65526); // expected-warning {{TRUE}}
+    unsigned char uc = x;
+    // Conversion: char[-11, -10] -> unsigned char[245, 246]
+    clang_analyzer_eval(245 <= uc && uc <= 246); // expected-warning {{TRUE}}
+  }
+}
+
+void test18(char c, short s, int i) {
+  // Any char value always is less then 1000.
+  int OneThousand = 1000;
+  c = i;
+  if (c < OneThousand)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+
+  // Any short value always is greater then 40000.
+  int MinusFourtyThousands = -40000;
+  s = i;
+  if (s > MinusFourtyThousands)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+}
+
+void test19(char x, short y) {
+  if (-43 <= x && x <= -42) { // x[-42, -43]
+    y = 42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test20(char x, short y) {
+  if (42 <= y && y <= 43) { // y[42, 43]
+    x = -42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test21(unsigned x) {
+  if (x > 42) {
+    // Unsigned range can generate two signed ranges.
+    // Conversion: unsigned[43, 4294967295] -> int[-2147483648, -1]U[43, 2147483647]
+    int i = x;                             // initialization
+    clang_analyzer_eval(-1 < i && i < 43); // expected-warning {{FALSE}}
+  }
+}
+
+void test22(int x, unsigned u) {
+  if (x > -42) {
+    // Signed range can generate two unsigned ranges.
+    // Conversion: int[-41, 2147483647] -> unsigned[0, 2147483647]U[4294967255, 4294967295]
+    u = x;                                                 // assignment
+    clang_analyzer_eval(2147483647 < u && u < 4294967255); // expected-warning {{FALSE}}
+  }
+}
Index: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -536,8 +536,11 @@
       // We only handle LHS as simple symbols or SymIntExprs.
       SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
 
+      // Unwrap SymbolCast trying to find SymIntExpr inside.
+      SymbolRef S = Sym->ignoreCasts();
+
       // LHS is a symbolic expression.
-      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
+      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(S)) {
 
         // Is this a logical not? (!x is represented as x == 0.)
         if (op == BO_EQ && rhs.isZeroConstant()) {
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -13,6 +13,7 @@
 
 #include "clang/Basic/JsonSupport.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
@@ -20,8 +21,8 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -1127,6 +1128,60 @@
 //                           Symbolic reasoning logic
 //===----------------------------------------------------------------------===//
 
+/// This class is used for integral symbolic casts feature as a helper instance.
+///
+/// It represents a list of integral types of different sizes going in ascending
+/// order from 1 to 8 bytes. It aggregates several functions for convenience of
+/// usage. We can iterate through the types and find a type by size (bit width).
+///
+/// We use FOUR integer types: `int8`, `int16`, `int32`, `int64`, because we
+/// only support casts between types, which are lower or equal to 64-bit width.
+///
+/// We use these types for creating SymbolCast to find constraints in the
+/// constraint map. This allows to canonize a `key-value` to store and retrieve
+/// constraints instead of brute force.
+///
+/// We don't care about the type signedness. Signedness is just a way of bits
+/// representation. We just care about saving data. It's enough for us to store
+/// specific constraints for the type for a specific bit width. We never use
+/// retrieved constraint directly. We always use RangeSet::Factory::castTo to
+/// get ranges for a needed type (signed or unsigned) after retrieving.
+class NominalTypeList {
+  CanQualType Types[4];
+
+public:
+  using Iterator = CanQualType *;
+
+  void init(ASTContext &C) {
+    Types[0] = C.Char8Ty;
+    Types[1] = C.Char16Ty;
+    Types[2] = C.Char32Ty;
+    Types[3] = C.LongLongTy;
+  }
+  Iterator findByWidth(uint32_t Width) {
+    int index = 4;
+    switch (Width) {
+    case 8:
+      index = 0;
+      break;
+    case 16:
+      index = 1;
+      break;
+    case 32:
+      index = 2;
+      break;
+    case 64:
+      index = 3;
+    };
+    return Types + index;
+  }
+  Iterator begin() { return std::begin(Types); }
+  Iterator end() { return std::end(Types); }
+};
+
+// We should initialize NTL with `init` method before use.
+static NominalTypeList NTL;
+
 /// A little component aggregating all of the reasoning we have about
 /// the ranges of symbolic expressions.
 ///
@@ -1142,6 +1197,69 @@
     return Inferrer.infer(Origin);
   }
 
+  RangeSet VisitSymbolCast(const SymbolCast *Sym) {
+    AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions();
+    if (!Opts.ShouldSupportSymbolicIntegerCasts)
+      return VisitSymExpr(Sym);
+
+    // Unwrap symbol to get an underlying(root) symbol.
+    // Store every next type except the inner(original) one.
+    SmallVector<QualType, 2> Types;
+    uint32_t MinBitWidth = UINT32_MAX;
+    SymbolRef RootSym = Sym;
+    ASTContext &C = ValueFactory.getContext();
+    do {
+      // We only handle integral cast, when all the types are integrals.
+      // Otherwise, pass the given symbol to VisitSymExpr.
+      QualType T = RootSym->getType();
+      if (!T->isIntegralOrEnumerationType())
+        return VisitSymExpr(Sym);
+
+      MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T));
+      Types.push_back(T);
+      RootSym = cast<SymbolCast>(RootSym)->getOperand();
+    } while (isa<SymbolCast>(RootSym));
+
+    QualType RootTy = RootSym->getType();
+    const uint32_t RootBitWidth = C.getIntWidth(RootTy);
+
+    // Check if we have any known truncated ranges of the root symbol.
+    // Truncated ranges usually are more precise then the original one.
+    // The more truncated is the range the more precise it should be.
+    // Example: Consider the given SymbolCast is (int8)(int64)(int16){int32 x}.
+    // `int8` - is the smallest type. Than the range will fit in it.
+    // Traverse through NTL types, that are smaller then the root type:
+    // [int8, int32).
+    const RangeSet *RSPtr = nullptr;
+    auto It = NTL.findByWidth(MinBitWidth);
+    auto E = NTL.findByWidth(RootBitWidth);
+    for (; !RSPtr && It < E; ++It) {
+      // Produce canonical symbols with the nominal type.
+      SymbolRef S =
+          State->getSymbolManager().getCastSymbol(RootSym, RootTy, *It);
+      // Find the first constraint and exit the loop.
+      RSPtr = getConstraint(State, S);
+    }
+    // If we didn't find any truncated ranges, look for the constraint for
+    // the root type.
+    // Example (cont.): Use the root symbol `{int32 x}`.
+    if (!RSPtr)
+      RSPtr = getConstraint(State, RootSym);
+    // If there's no existing range, create it based on the root type.
+    // Example (cont.): Make range based on `int32`.
+    RangeSet RS = RSPtr ? *RSPtr : infer(RootTy);
+
+    // Cast the range to the cast types from inner to outer one by one.
+    // Example (cont.): Go through 3 types from `int16` to `int8`.
+    auto TypesReversedRange = llvm::make_range(Types.rbegin(), Types.rend());
+    for (const QualType T : TypesReversedRange)
+      RS = RangeFactory.castTo(RS, T);
+
+    // Finally we got a range of Sym->getType() type.
+    // Example (cont.): Type of range is `int8`.
+    return RS;
+  }
+
   RangeSet VisitSymExpr(SymbolRef Sym) {
     // If we got to this function, the actual type of the symbolic
     // expression is not supported for advanced inference.
@@ -1689,7 +1807,9 @@
 class RangeConstraintManager : public RangedConstraintManager {
 public:
   RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
-      : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
+      : RangedConstraintManager(EE, SVB), F(getBasicVals()) {
+    NTL.init(SVB.getContext());
+  }
 
   //===------------------------------------------------------------------===//
   // Implementation for interface from ConstraintManager.
@@ -1756,6 +1876,13 @@
 private:
   RangeSet::Factory F;
 
+  std::tuple<ProgramStateRef, SymbolRef, RangeSet>
+  modifySymbolAndConstraints(ProgramStateRef State, SymbolRef Sym, RangeSet R);
+  ProgramStateRef updateExistingConstraints(ProgramStateRef State,
+                                            SymbolRef Sym, RangeSet R);
+  Optional<std::pair<SymbolRef, RangeSet>>
+  getProperSymbolAndConstraint(SymbolRef Sym, RangeSet R);
+
   RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
   RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
 
@@ -1795,6 +1922,8 @@
   ProgramStateRef track(RangeSet NewConstraint, ProgramStateRef State,
                         SymbolRef Sym, const llvm::APSInt &Int,
                         const llvm::APSInt &Adjustment) {
+    assert(State);
+
     if (NewConstraint.isEmpty())
       // This is an infeasible assumption.
       return nullptr;
@@ -2574,6 +2703,159 @@
 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
 // UINT_MAX, 0, 1, and 2.
 
+/// Prepare a proper symbol and ranges to save them into the constraint map.
+/// Update existing constraints related to the given symbol if it is an integral
+/// one.
+///
+/// \param State -- current program state.
+/// \param Sym -- a considered symbol.
+/// \param R -- a known range for the given symbol.
+/// \returns the triple set which matches to the parameters. Each argument
+///  corrects (if needed) and returns back. State is null in case of infisible
+///  branch.
+///
+/// \note: this function is a helper and only invokes in
+///  RangeConstraintManager::assume### methods.
+std::tuple<ProgramStateRef, SymbolRef, RangeSet>
+RangeConstraintManager::modifySymbolAndConstraints(ProgramStateRef State,
+                                                   SymbolRef Sym, RangeSet R) {
+  AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions();
+  if (!Opts.ShouldSupportSymbolicIntegerCasts ||
+      !Sym->getType()->isIntegralOrEnumerationType() || R.isEmpty())
+    return {State, Sym, R};
+
+  auto OptSymRange = getProperSymbolAndConstraint(Sym, R);
+  // If symbol is not integral, return the triple without handling.
+  if (!OptSymRange)
+    return {State, Sym, R};
+
+  Sym = OptSymRange->first;
+  R = OptSymRange->second;
+
+  State = updateExistingConstraints(State, Sym, R);
+  return {State, Sym, R};
+}
+
+/// Return a pair of a right symbol and ranges to save them in the constraint
+/// map. We should correct symbol because in case of truncation cast we can only
+/// reason about truncated bytes but not the whole value. E.g. (char)(int x),
+/// we can store constraints for the first lower byte but we still don't know
+/// the original value. Also in case of promotion or converion we should store
+/// the original value with original ranges, instead of cast symbol, because we
+/// are not intrested in any constraints of cast symbol but the original symbol
+/// in `if` expression or any bifurcation.
+/// Technically, we only handle SymbolCast here, since other types are OK
+/// and don't need additional handling.
+/// We can return:
+/// - a new symbol based on the root, in case of a truncation,
+/// - a root symbol if it is not a truncation.
+/// We also cast and return ranges to the type of a result symbol.
+///
+/// \param Sym -- a considered symbol.
+/// \param R -- a known range for the given symbol.
+Optional<std::pair<SymbolRef, RangeSet>>
+RangeConstraintManager::getProperSymbolAndConstraint(SymbolRef Sym,
+                                                     RangeSet R) {
+  // We don't need to do any extra work for non-SymbolCast's.
+  if (!isa<SymbolCast>(Sym))
+    return std::make_pair(Sym, R);
+
+  // Extract a root symbol and compare it to outer types.
+  ASTContext &C = getBasicVals().getContext();
+  SymbolRef RootSym = Sym;
+  // Get the root symbol.
+  uint32_t MinBitWidth = UINT32_MAX;
+  do {
+    // We only handle integral cast, when all the types are integrals.
+    // Return `None` in this particular case to notify user that we can not
+    // handle non-integral SymbolCast.
+    QualType T = RootSym->getType();
+    if (!T->isIntegralOrEnumerationType())
+      return None;
+    MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T));
+    RootSym = cast<SymbolCast>(RootSym)->getOperand();
+  } while (isa<SymbolCast>(RootSym));
+
+  // Check for trunation.
+  QualType RootTy = RootSym->getType();
+  uint32_t RootBitWidth = C.getIntWidth(RootTy);
+  const bool IsTruncated = (MinBitWidth < RootBitWidth);
+
+  if (IsTruncated) {
+    // Trancation occurred. High bits lost. We can't reason about ranges of
+    // the original(root) operand in this case, so we should not add it to the
+    // constraint map. Canonize Sym instead.
+    // We produce a new symbol using a NTL type equals to the smallest type of
+    // Sym. For instance:
+    // - (int)(uchar)x -> (char8)x
+    // - (long)(ushort)(short)x -> (char16)x
+
+    // Make a truncated range.
+    CanQualType Ty = *NTL.findByWidth(MinBitWidth);
+    R = F.castTo(R, Ty);
+    // Produce a new SymbolCast.
+    RootSym = getSymbolManager().getCastSymbol(RootSym, RootTy, Ty);
+  } else {
+    // Promotion or conversion occurred. No bit lost. Make a range for the root
+    // type. Cast the given range to the type of the root range.
+    R = F.castTo(R, RootTy);
+  }
+
+  return std::make_pair(RootSym, R);
+}
+
+/// Update exsiting constraints for all truncated SymbolCasts based on the
+/// given symbol which types are less than the current one.
+/// For instance, for Sym:
+/// - {int8 x}  update nothing;
+/// - {int16 x} update (int8)x;
+/// - {int32 x} update (int8)x, (int16)x;
+/// - {int64 x} update (int8)x, (int16)x, (int32)x.
+///
+/// FIXME: Update bigger casts. We only can reason about ranges of smaller
+/// types, because it would be too complicated to update, say, the entire `int`
+/// range if you only have knowledge that its lowest byte has been changed. So
+/// we don't touch bigger casts and they may be potentially invalid.
+/// For future, for:
+/// - {int8 x}  update (int16)x, (int32)x, (int64)x;
+/// - {int16 x} update (int32)x, (int64)x;
+/// - {int32 x} update (int64)x;
+/// - {int64 x} update nothing.
+///
+/// \param State -- current program state.
+/// \param Sym -- a considered symbol.
+/// \param R -- a known range for the given symbol.
+/// \returns the state with updated constraints. State is null if the branch is
+/// infisible.
+ProgramStateRef
+RangeConstraintManager::updateExistingConstraints(ProgramStateRef State,
+                                                  SymbolRef Sym, RangeSet R) {
+  unsigned SymBitWidth =
+      getBasicVals().getContext().getIntWidth(Sym->getType());
+  // Get a root symbol in case of SymbolCast.
+  Sym = Sym->ignoreCasts();
+  auto SmallerNTLTypes =
+      llvm::make_range(NTL.begin(), NTL.findByWidth(SymBitWidth));
+  for (const QualType T : SmallerNTLTypes) {
+    // Use NTL typr to create canonical SymbolCast to find an existing
+    // constraint.
+    SymbolRef S =
+        State->getSymbolManager().getCastSymbol(Sym, Sym->getType(), T);
+    // If such constraint is found, update it by intersecting.
+    if (const RangeSet *RS = getConstraint(State, S)) {
+      RangeSet TruncR = F.castTo(R, T);
+      TruncR = F.intersect(*RS, TruncR);
+      // If intersection is empty, then the branch is infisible.
+      if (TruncR.isEmpty())
+        return nullptr;
+      // Update the constraint.
+      State = setConstraint(State, S, TruncR);
+    }
+  }
+
+  return State;
+}
+
 ProgramStateRef
 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
                                     const llvm::APSInt &Int,
@@ -2588,6 +2870,9 @@
   RangeSet New = getRange(St, Sym);
   New = F.deletePoint(New, Point);
 
+  std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New);
+  if (!St)
+    return nullptr;
   return trackNE(New, St, Sym, Int, Adjustment);
 }
 
@@ -2605,6 +2890,9 @@
   RangeSet New = getRange(St, Sym);
   New = F.intersect(New, AdjInt);
 
+  std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New);
+  if (!St)
+    return nullptr;
   return trackEQ(New, St, Sym, Int, Adjustment);
 }
 
@@ -2642,6 +2930,10 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
+
+  std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New);
+  if (!St)
+    return nullptr;
   return trackNE(New, St, Sym, Int, Adjustment);
 }
 
@@ -2679,6 +2971,10 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
+
+  std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New);
+  if (!St)
+    return nullptr;
   return trackNE(New, St, Sym, Int, Adjustment);
 }
 
@@ -2716,7 +3012,14 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
-  return New.isEmpty() ? nullptr : setConstraint(St, Sym, New);
+
+  if (New.isEmpty())
+    return nullptr;
+
+  std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New);
+  if (!St)
+    return nullptr;
+  return setConstraint(St, Sym, New);
 }
 
 RangeSet
@@ -2760,7 +3063,14 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
-  return New.isEmpty() ? nullptr : setConstraint(St, Sym, New);
+
+  if (New.isEmpty())
+    return nullptr;
+
+  std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New);
+  if (!St)
+    return nullptr;
+  return setConstraint(St, Sym, New);
 }
 
 ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
Index: clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
@@ -418,6 +418,12 @@
 
   ProgramStateRef State = C.getState();
 
+  // Unwrap symbolic expression to skip argument casts on function call.
+  // This is useful when there is no way for overloading function in C
+  // but we need to pass different types of arguments and
+  // implicit cast occures.
+  Sym = Sym->ignoreCasts();
+
   C.addTransition(C.getState()->set<DenotedSymbols>(Sym, E));
 }
 
Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -262,7 +262,7 @@
 
 /// Represents a cast expression.
 class SymbolCast : public SymExpr {
-  const SymExpr *Operand;
+  SymbolRef Operand;
 
   /// Type of the operand.
   QualType FromTy;
@@ -271,7 +271,7 @@
   QualType ToTy;
 
 public:
-  SymbolCast(const SymExpr *In, QualType From, QualType To)
+  SymbolCast(SymbolRef In, QualType From, QualType To)
       : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
     assert(In);
     assert(isValidTypeForSymbol(From));
@@ -287,12 +287,19 @@
 
   QualType getType() const override { return ToTy; }
 
-  const SymExpr *getOperand() const { return Operand; }
+  SymbolRef getOperand() const { return Operand; }
+
+  SymbolRef ignoreCasts() const override {
+    SymbolRef Sym = Operand;
+    while (isa<SymbolCast>(Sym))
+      Sym = cast<SymbolCast>(Sym)->Operand;
+    return Sym;
+  }
 
   void dumpToStream(raw_ostream &os) const override;
 
-  static void Profile(llvm::FoldingSetNodeID& ID,
-                      const SymExpr *In, QualType From, QualType To) {
+  static void Profile(llvm::FoldingSetNodeID &ID, SymbolRef In, QualType From,
+                      QualType To) {
     ID.AddInteger((unsigned) SymbolCastKind);
     ID.AddPointer(In);
     ID.Add(From);
@@ -304,9 +311,7 @@
   }
 
   // Implement isa<T> support.
-  static bool classof(const SymExpr *SE) {
-    return SE->getKind() == SymbolCastKind;
-  }
+  static bool classof(SymbolRef SE) { return SE->getKind() == SymbolCastKind; }
 };
 
 /// Represents a symbolic expression involving a binary operator
Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
+++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
@@ -62,6 +62,8 @@
   virtual QualType getType() const = 0;
   virtual void Profile(llvm::FoldingSetNodeID &profile) = 0;
 
+  virtual const SymExpr *ignoreCasts() const { return this; }
+
   /// Iterator over symbols that the current symbol depends on.
   ///
   /// For SymbolData, it's the symbol itself; for expressions, it's the
Index: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
+++ clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
@@ -135,8 +135,9 @@
            " (" + Visit(S->getRHS()) + ")";
   }
 
-  // TODO: SymbolCast doesn't appear in practice.
-  // Add the relevant code once it does.
+  std::string VisitSymbolCast(const SymbolCast *S) {
+    return "(" + S->getType().getAsString() + ")" + Visit(S->getOperand());
+  }
 
   std::string VisitSymbolicRegion(const SymbolicRegion *R) {
     // Explain 'this' object here.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to