ASDenysPetrov updated this revision to Diff 388552.
ASDenysPetrov added a comment.

Rebased.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103096/new/

https://reviews.llvm.org/D103096

Files:
  clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
  clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
  clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
  clang/test/Analysis/symbol-integral-cast.cpp

Index: clang/test/Analysis/symbol-integral-cast.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-integral-cast.cpp
@@ -0,0 +1,374 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -analyzer-config eagerly-assume=false -analyzer-config support-symbolic-integer-casts=true -verify %s
+
+template <typename T>
+void clang_analyzer_eval(T);
+void clang_analyzer_warnIfReached();
+
+typedef short int16_t;
+typedef int int32_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+void test1(int x) {
+  // Even if two lower bytes of `x` equal to zero, it doesn't mean that
+  // the entire `x` is zero. We are not able to know the exact value of x.
+  // It can be one of  65536 possible values like [0, 65536, 131072, ...]
+  // and so on. To avoid huge range sets we still assume `x` in the range
+  // [INT_MIN, INT_MAX].
+  if (!(short)x) {
+    if (!x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test2(int x) {
+  // If two lower bytes of `x` equal to zero, and we know x to be 65537,
+  // which is not truncated to short as zero. Thus the branch is infisible.
+  short s = x;
+  if (!s) {
+    if (x == 65537)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test3(int x, short s) {
+  s = x;
+  if ((short)x > -10 && s < 10) {
+    if (x > 0 && x < 10) {
+      // If the range of the whole variable was constrained then reason again
+      // about truncated bytes to make the ranges more precise.
+      clang_analyzer_eval((short)x <= 0); // expected-warning {{FALSE}}
+    }
+  }
+}
+
+void test4(unsigned x) {
+  if ((char)x > 8) {
+    // Constraint the range of the lowest byte of `x` to [9, CHAR_MAX].
+    // The original range of `x` still remains [0, UINT_MAX].
+    clang_analyzer_eval((char)x < 42); // expected-warning {{UNKNOWN}}
+    if (x < 42) {
+      // Constraint the original range to [0, 42] and update (re-constraint)
+      // the range of the lowest byte of 'x' to [9, 42].
+      clang_analyzer_eval((char)x < 42); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test5(unsigned x) {
+  if ((char)x > -10 && (char)x < 10) {
+    if ((short)x == 8) {
+      // If the range of higher bytes(short) was constrained then reason again
+      // about smaller truncated ranges(char) to make it more precise.
+      clang_analyzer_eval((char)x == 8);  // expected-warning {{TRUE}}
+      clang_analyzer_eval((short)x == 8); // expected-warning {{TRUE}}
+      // We still assume full version of `x` in the range [INT_MIN, INT_MAX].
+      clang_analyzer_eval(x == 8); // expected-warning {{UNKNOWN}}
+    }
+  }
+}
+
+void test6(int x) {
+  // Even if two lower bytes of `x` less than zero, it doesn't mean that `x`
+  // can't be greater than zero. Thence we don't change the native range of
+  // `x` and this branch is feasible.
+  if (x > 0)
+    if ((short)x < 0)
+      clang_analyzer_eval(x > 0); // expected-warning {{TRUE}}
+}
+
+void test7(int x) {
+  // The range of two lower bytes of `x` [1, SHORT_MAX] is enough to cover
+  // all possible values of char [CHAR_MIN, CHAR_MAX]. So the lowest byte
+  // can be lower than zero.
+  if ((short)x > 0) {
+    if ((char)x < 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test8(int x) {
+  // Promotion from `signed int` to `signed long long` also reasoning about the
+  // original range, because we know the fact that even after promotion it
+  // remains in the range [INT_MIN, INT_MAX].
+  if ((long long)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{TRUE}}
+}
+
+void test9(signed int x) {
+  // Any cast `signed` to `unsigned` produces an unsigned range, which is
+  // [0, UNSIGNED_MAX] and can not be lower than zero.
+  if ((unsigned long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned int)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned short)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned char)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test10(unsigned int x, signed char sc) {
+  // Promotion from `unsigned` to `signed` produces a signed range,
+  // which is able to cover all the values of the original,
+  // so that such cast is not lower than zero.
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]
+  // and can be lower than zero.
+  if ((signed int)x < 0)            // explicit cast
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  signed short ss = x; // initialization
+  if (ss < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  sc = x; // assignment
+  if (sc < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test11(unsigned int x) {
+  // Promotion from 'unsigned' to 'signed' entirely covers the original range.
+  // Thence such cast is not lower than zero and the `true` branch is
+  // infiseable. But it doesn't affect the original range, which still remains
+  // as [0, UNSIGNED_MAX].
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]. But it doesn't
+  // affect the original range, which still remains as [0, UNSIGNED_MAX].
+  if ((signed int)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed short)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed char)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+}
+
+void test12(int x, char c) {
+  if (x >= 5308) {
+    if (x <= 5419) {
+      // Truncation on assignment: int[5308, 5419] -> char[-68, 43]
+      c = x;
+      clang_analyzer_eval(-68 <= c && c <= 43); // expected-warning {{TRUE}}
+
+      if (c < 50)
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+      else
+        clang_analyzer_warnIfReached(); // no-warning
+
+      // Truncation on initializaion: int[5308, 5419] -> char[-68, 43]
+      char c1 = x;
+      clang_analyzer_eval(-68 <= c1 && c1 <= 43); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test13(int x) {
+  if (x > 913440767 && x < 913440769) { // 0x36720000
+
+    if ((short)x)                     // Truncation: int[913440768] -> short[0]
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+    if ((short)x != 0)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test14(int x) {
+  if (x >= -1569193983 && x <= 578290016) {
+    // The big range of `x` covers all possible values of short.
+    // Truncation: int[-1569193983, 578290016] -> short[-32768, 32767]
+    if ((short)x > 0) {
+      clang_analyzer_eval(-1569193983 <= x && x <= 578290016); // expected-warning {{TRUE}}
+      short s = x;
+      clang_analyzer_eval(-32768 <= s && s <= 32767); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test15(int x) {
+  if (x >= -1569193983 && x <= -1569193871) { // [0xA2780001, 0xA2780071]
+    // The small range of `x` covers only several values of short.
+    // Truncation: int[-1569193983, -1569193871] -> short[1, 113]
+    if ((short)x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x > 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x < 114)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+  }
+}
+
+void test16(char x) {
+  if (x < 0)
+    clang_analyzer_eval(-128 <= x && x < 0); // expected-warning {{TRUE}}
+  else
+    clang_analyzer_eval(0 <= x && x <= 127); // expected-warning {{TRUE}}
+}
+
+void test17(char x) {
+  if (-11 <= x && x <= -10) {
+    unsigned u = x;
+    // Conversion: char[-11, -10] -> unsigned int[4294967285, 4294967286]
+    clang_analyzer_eval(4294967285 <= u && u <= 4294967286); // expected-warning {{TRUE}}
+    unsigned short us = x;
+    // Conversion: char[-11, -10] -> unsigned short[65525, 65526]
+    clang_analyzer_eval(65525 <= us && us <= 65526); // expected-warning {{TRUE}}
+    unsigned char uc = x;
+    // Conversion: char[-11, -10] -> unsigned char[245, 246]
+    clang_analyzer_eval(245 <= uc && uc <= 246); // expected-warning {{TRUE}}
+  }
+}
+
+void test18(char c, short s, int i) {
+  // Any char value always is less then 1000.
+  int OneThousand = 1000;
+  c = i;
+  if (c < OneThousand)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+
+  // Any short value always is greater then 40000.
+  int MinusFourtyThousands = -40000;
+  s = i;
+  if (s > MinusFourtyThousands)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+}
+
+void test19(char x, short y) {
+  if (-43 <= x && x <= -42) { // x[-42, -43]
+    y = 42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test20(char x, short y) {
+  if (42 <= y && y <= 43) { // y[42, 43]
+    x = -42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test21(unsigned x) {
+  if (x > 42) {
+    // Unsigned range can generate two signed ranges.
+    // Conversion: unsigned[43, 4294967295] -> int[-2147483648, -1]U[43, 2147483647]
+    int i = x;                             // initialization
+    clang_analyzer_eval(-1 < i && i < 43); // expected-warning {{FALSE}}
+  }
+}
+
+void test22(int x, unsigned u) {
+  if (x > -42) {
+    // Signed range can generate two unsigned ranges.
+    // Conversion: int[-41, 2147483647] -> unsigned[0, 2147483647]U[4294967255, 4294967295]
+    u = x;                                                 // assignment
+    clang_analyzer_eval(2147483647 < u && u < 4294967255); // expected-warning {{FALSE}}
+  }
+}
+
+// PR51036
+void test23(signed char c) {
+  if ((unsigned int)c <= 200) {
+    // Conversion: char[0, 127] -> unsigned int[0, 127]
+    clang_analyzer_eval(0 <= c && c <= 127); // expected-warning {{TRUE}}
+  }
+}
+
+void test24(int x, int y) {
+  if (x == y) {
+    short s = x;
+    if (!s) {
+      if (y == 65537)
+        // FIXME: This should not warn. Support EquivalenceClasses.
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+      else
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    }
+  }
+}
Index: clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -543,3 +543,10 @@
 
   return VarContext->isParentOf(CurrentContext);
 }
+
+SymbolRef SymExpr::ignoreCasts() const {
+  SymbolRef Sym = this;
+  while (isa<SymbolCast>(Sym))
+    Sym = cast<SymbolCast>(Sym)->getOperand();
+  return Sym;
+}
Index: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -532,8 +532,11 @@
       // We only handle LHS as simple symbols or SymIntExprs.
       SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
 
+      // Unwrap SymbolCast trying to find SymIntExpr inside.
+      SymbolRef S = Sym->ignoreCasts();
+
       // LHS is a symbolic expression.
-      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
+      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(S)) {
 
         // Is this a logical not? (!x is represented as x == 0.)
         if (op == BO_EQ && rhs.isZeroConstant()) {
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -13,6 +13,7 @@
 
 #include "clang/Basic/JsonSupport.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
@@ -20,8 +21,8 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -1048,6 +1049,13 @@
   return State->set<ConstraintRange>(Class, Constraint);
 }
 
+LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State,
+                                             SymbolRef Sym,
+                                             RangeSet Constraint) {
+  return State->set<ConstraintRange>(EquivalenceClass::find(State, Sym),
+                                     Constraint);
+}
+
 LLVM_NODISCARD ProgramStateRef setConstraints(ProgramStateRef State,
                                               ConstraintRangeTy Constraints) {
   return State->set<ConstraintRange>(Constraints);
@@ -1179,6 +1187,60 @@
 //                           Symbolic reasoning logic
 //===----------------------------------------------------------------------===//
 
+/// This class is used for integral symbolic casts feature as a helper instance.
+///
+/// It represents a list of integral types of different sizes going in ascending
+/// order from 1 to 8 bytes. It aggregates several functions for convenience of
+/// usage. We can iterate through the types and find a type by size (bit width).
+///
+/// We use FOUR integer types: `int8`, `int16`, `int32`, `int64`, because we
+/// only support casts between types, which are lower or equal to 64-bit width.
+///
+/// We use these types for creating SymbolCast to find constraints in the
+/// constraint map. This allows to canonize a `key-value` to store and retrieve
+/// constraints instead of brute force.
+///
+/// We don't care about the type signedness. Signedness is just a way of bits
+/// representation. We just care about saving data. It's enough for us to store
+/// specific constraints for the type for a specific bit width. We never use
+/// retrieved constraint directly. We always use RangeSet::Factory::castTo to
+/// get ranges for a needed type (signed or unsigned) after retrieving.
+class NominalTypeList {
+  CanQualType Types[4];
+
+public:
+  using Iterator = CanQualType *;
+
+  void init(ASTContext &C) {
+    Types[0] = C.Char8Ty;
+    Types[1] = C.Char16Ty;
+    Types[2] = C.Char32Ty;
+    Types[3] = C.LongLongTy;
+  }
+  Iterator findByWidth(uint32_t Width) {
+    int index = 4;
+    switch (Width) {
+    case 8:
+      index = 0;
+      break;
+    case 16:
+      index = 1;
+      break;
+    case 32:
+      index = 2;
+      break;
+    case 64:
+      index = 3;
+    };
+    return Types + index;
+  }
+  Iterator begin() { return std::begin(Types); }
+  Iterator end() { return std::end(Types); }
+};
+
+// We should initialize NTL with `init` method before use.
+static NominalTypeList NTL;
+
 /// A little component aggregating all of the reasoning we have about
 /// the ranges of symbolic expressions.
 ///
@@ -1194,6 +1256,69 @@
     return Inferrer.infer(Origin);
   }
 
+  RangeSet VisitSymbolCast(const SymbolCast *Sym) {
+    AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions();
+    if (!Opts.ShouldSupportSymbolicIntegerCasts)
+      return VisitSymExpr(Sym);
+
+    // Unwrap symbol to get an underlying(root) symbol.
+    // Store every next type except the inner(original) one.
+    SmallVector<QualType, 2> Types;
+    uint32_t MinBitWidth = UINT32_MAX;
+    SymbolRef RootSym = Sym;
+    ASTContext &C = ValueFactory.getContext();
+    do {
+      // We only handle integral cast, when all the types are integrals.
+      // Otherwise, pass the given symbol to VisitSymExpr.
+      QualType T = RootSym->getType();
+      if (!T->isIntegralOrEnumerationType())
+        return VisitSymExpr(Sym);
+
+      MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T));
+      Types.push_back(T);
+      RootSym = cast<SymbolCast>(RootSym)->getOperand();
+    } while (isa<SymbolCast>(RootSym));
+
+    QualType RootTy = RootSym->getType();
+    const uint32_t RootBitWidth = C.getIntWidth(RootTy);
+
+    // Check if we have any known truncated ranges of the root symbol.
+    // Truncated ranges usually are more precise then the original one.
+    // The more truncated is the range the more precise it should be.
+    // Example: Consider the given SymbolCast is (int8)(int64)(int16){int32 x}.
+    // `int8` - is the smallest type. Than the range will fit in it.
+    // Traverse through NTL types, that are smaller then the root type:
+    // [int8, int32).
+    const RangeSet *RSPtr = nullptr;
+    auto It = NTL.findByWidth(MinBitWidth);
+    auto E = NTL.findByWidth(RootBitWidth);
+    for (; !RSPtr && It < E; ++It) {
+      // Produce canonical symbols with the nominal type.
+      SymbolRef S =
+          State->getSymbolManager().getCastSymbol(RootSym, RootTy, *It);
+      // Find the first constraint and exit the loop.
+      RSPtr = getConstraint(State, S);
+    }
+    // If we didn't find any truncated ranges, look for the constraint for
+    // the root type.
+    // Example (cont.): Use the root symbol `{int32 x}`.
+    if (!RSPtr)
+      RSPtr = getConstraint(State, RootSym);
+    // If there's no existing range, create it based on the root type.
+    // Example (cont.): Make range based on `int32`.
+    RangeSet RS = RSPtr ? *RSPtr : infer(RootTy);
+
+    // Cast the range to the cast types from inner to outer one by one.
+    // Example (cont.): Go through 3 types from `int16` to `int8`.
+    auto TypesReversedRange = llvm::make_range(Types.rbegin(), Types.rend());
+    for (const QualType T : TypesReversedRange)
+      RS = RangeFactory.castTo(RS, T);
+
+    // Finally we got a range of Sym->getType() type.
+    // Example (cont.): Type of range is `int8`.
+    return RS;
+  }
+
   RangeSet VisitSymExpr(SymbolRef Sym) {
     // If we got to this function, the actual type of the symbolic
     // expression is not supported for advanced inference.
@@ -1751,7 +1876,9 @@
 class RangeConstraintManager : public RangedConstraintManager {
 public:
   RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
-      : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
+      : RangedConstraintManager(EE, SVB), F(getBasicVals()) {
+    NTL.init(SVB.getContext());
+  }
 
   //===------------------------------------------------------------------===//
   // Implementation for interface from ConstraintManager.
@@ -1862,7 +1989,7 @@
 /// Derived class can control which types we handle by defining methods of the
 /// following form:
 ///
-///   bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
+///   bool assign${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
 ///                                       CONSTRAINT Constraint);
 ///
 /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
@@ -1980,12 +2107,15 @@
   }
 
   inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
+  inline bool assignSymExprToRangeSet(const SymExpr *Sym, RangeSet Constraint);
   inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
                                          RangeSet Constraint) {
     return handleRemainderOp(Sym, Constraint);
   }
   inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
                                          RangeSet Constraint);
+  inline bool assignSymbolCastToRangeSet(const SymbolCast *Sym,
+                                         RangeSet Constraint);
 
 private:
   ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
@@ -2058,8 +2188,8 @@
   LLVM_NODISCARD Optional<bool> interpreteAsBool(RangeSet Constraint) {
     assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
 
-    if (Constraint.getConcreteValue())
-      return !Constraint.getConcreteValue()->isZero();
+    if (const llvm::APSInt *Int = Constraint.getConcreteValue())
+      return !Int->isZero();
 
     if (!Constraint.containsZero())
       return true;
@@ -2067,11 +2197,192 @@
     return llvm::None;
   }
 
+  void updateExistingConstraints(SymbolRef Sym, RangeSet R);
+  SymbolRef getProperSymbol(SymbolRef Sym);
+
   ProgramStateRef State;
   SValBuilder &Builder;
   RangeSet::Factory &RangeFactory;
 };
 
+//===----------------------------------------------------------------------===//
+//                  ConstraintAssignor implementation details
+//===----------------------------------------------------------------------===//
+
+bool ConstraintAssignor::assignSymExprToRangeSet(const SymExpr *Sym,
+                                                 RangeSet Constraint) {
+  AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions();
+  if (Opts.ShouldSupportSymbolicIntegerCasts ||
+      !Sym->getType()->isIntegralOrEnumerationType()) {
+    updateExistingConstraints(Sym, Constraint);
+    if (!State)
+      return false;
+  }
+
+  // Next assignments is based on the fact that Constraint is a concrete value.
+  // Make sure of this.
+  if (!Constraint.getConcreteValue())
+    return true;
+
+  llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
+  // Iterate over all equivalence classes and try to simplify them.
+  ClassMembersTy Members = State->get<ClassMembers>();
+  for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
+    EquivalenceClass Class = ClassToSymbolSet.first;
+    State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
+    if (!State)
+      return false;
+    SimplifiedClasses.insert(Class);
+  }
+
+  // Trivial equivalence classes (those that have only one symbol member) are
+  // not stored in the State. Thus, we must skim through the constraints as
+  // well. And we try to simplify symbols in the constraints.
+  ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+  for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
+    EquivalenceClass Class = ClassConstraint.first;
+    if (SimplifiedClasses.count(Class)) // Already simplified.
+      continue;
+    State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
+    if (!State)
+      return false;
+  }
+
+  return true;
+}
+
+bool ConstraintAssignor::assignSymbolCastToRangeSet(const SymbolCast *Sym,
+                                                    RangeSet R) {
+  AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions();
+  // If symbol is not integral or the option is off, we need another handler.
+  if (!Opts.ShouldSupportSymbolicIntegerCasts ||
+      !Sym->getType()->isIntegralOrEnumerationType())
+    return false;
+
+  // If range is empty, the branch is infeasible.
+  if (R.isEmpty()) {
+    State = nullptr;
+    return false;
+  }
+
+  SymbolRef S = getProperSymbol(Sym);
+  // If symbol is not integral, we need another handler.
+  if (!S)
+    return true;
+
+  R = RangeFactory.castTo(R, S->getType());
+  updateExistingConstraints(S, R);
+  State = setConstraint(State, S, R);
+
+  return false;
+}
+
+/// Return a symbol which is the best canidate to save it in the constraint
+/// map. We should correct symbol because in case of truncation cast we can
+/// only reason about truncated bytes but not the whole value. E.g. (char)(int
+/// x), we can store constraints for the first lower byte but we still don't
+/// know the root value. Also in case of promotion or converion we should
+/// store the root value instead of cast symbol, because we can always get
+/// a correct range using `castTo` metho. And we are not intrested in any
+/// constraints of cast symbol but the root symbol in `if` expression
+/// or any bifurcation. We can return:
+/// - a new symbol based on the root, in case of a truncation,
+/// - a root symbol if it is not a truncation.
+///
+/// \param Sym -- a given symbol.
+/// \returns a corrected symbol based on a given one. Symbol is null if the
+/// given symbol is unsupported. We support only integral casts.
+SymbolRef ConstraintAssignor::getProperSymbol(SymbolRef Sym) {
+  // We don't need to do any extra work for non-SymbolCast's.
+  if (!isa<SymbolCast>(Sym))
+    return Sym;
+
+  // Extract a root symbol and compare it to outer types.
+  ASTContext &C = Builder.getContext();
+  SymbolRef RootSym = Sym;
+  // Get the root symbol.
+  uint32_t MinBitWidth = UINT32_MAX;
+  do {
+    // We only handle integral cast, when all the types are integrals.
+    // Return `None` in this particular case to notify user that we can not
+    // handle non-integral SymbolCast.
+    QualType T = RootSym->getType();
+    if (!T->isIntegralOrEnumerationType())
+      return nullptr;
+    MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T));
+    RootSym = cast<SymbolCast>(RootSym)->getOperand();
+  } while (isa<SymbolCast>(RootSym));
+
+  // Check for trunation.
+  QualType RootTy = RootSym->getType();
+  uint32_t RootBitWidth = C.getIntWidth(RootTy);
+
+  const bool IsTruncated = (MinBitWidth < RootBitWidth);
+  if (IsTruncated) {
+    // Trancation occurred. High bits lost. We can't reason about ranges of
+    // the original(root) operand in this case, so we should not add it to the
+    // constraint map. Canonize Sym instead.
+    // We produce a new symbol using a NTL type equals to the smallest type of
+    // Sym. For instance:
+    // - (int)(uchar)x -> (char8)x
+    // - (long)(ushort)(short)x -> (char16)x
+
+    // Produce a new SymbolCast.
+    CanQualType Ty = *NTL.findByWidth(MinBitWidth);
+    RootSym = State->getSymbolManager().getCastSymbol(RootSym, RootTy, Ty);
+  }
+
+  return RootSym;
+}
+
+/// Update exsiting constraints for all truncated SymbolCasts based on the
+/// given symbol which types are less than the current one.
+/// For instance, for Sym:
+/// - {int8 x}  update nothing;
+/// - {int16 x} update (int8)x;
+/// - {int32 x} update (int8)x, (int16)x;
+/// - {int64 x} update (int8)x, (int16)x, (int32)x.
+///
+/// FIXME: Update bigger casts. We only can reason about ranges of smaller
+/// types, because it would be too complicated to update, say, the entire
+/// `int` range if you only have knowledge that its lowest byte has been
+/// changed. So we don't touch bigger casts and they may be potentially
+/// invalid. For future, for:
+/// - {int8 x}  update (int16)x, (int32)x, (int64)x;
+/// - {int16 x} update (int32)x, (int64)x;
+/// - {int32 x} update (int64)x;
+/// - {int64 x} update nothing.
+///
+/// \param State -- current program state.
+/// \param Sym -- a considered symbol.
+/// \param R -- a known range for the given symbol.
+/// \note: needs check of null state after use.
+void ConstraintAssignor::updateExistingConstraints(SymbolRef Sym, RangeSet R) {
+  unsigned SymBitWidth = Builder.getContext().getIntWidth(Sym->getType());
+  // Get a root symbol in case of SymbolCast.
+  Sym = Sym->ignoreCasts();
+  QualType SymTy = Sym->getType();
+  auto SmallerNTLTypes =
+      llvm::make_range(NTL.begin(), NTL.findByWidth(SymBitWidth));
+  SymbolManager &SM = State->getSymbolManager();
+  for (const QualType T : SmallerNTLTypes) {
+    // Use NTL typr to create canonical SymbolCast to find an existing
+    // constraint.
+    SymbolRef S = SM.getCastSymbol(Sym, SymTy, T);
+    // If such constraint is found, update it by intersecting.
+    if (const RangeSet *RS = getConstraint(State, S)) {
+      RangeSet TruncR = RangeFactory.castTo(R, T);
+      TruncR = RangeFactory.intersect(*RS, TruncR);
+      // If intersection is empty, then the branch is infisible.
+      if (TruncR.isEmpty()) {
+        State = nullptr;
+        break;
+      }
+      // Update the constraint.
+      State = setConstraint(State, S, TruncR);
+    }
+  }
+}
 
 bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
                                               const llvm::APSInt &Constraint) {
Index: clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
@@ -418,6 +418,12 @@
 
   ProgramStateRef State = C.getState();
 
+  // Unwrap symbolic expression to skip argument casts on function call.
+  // This is useful when there is no way for overloading function in C
+  // but we need to pass different types of arguments and
+  // implicit cast occures.
+  Sym = Sym->ignoreCasts();
+
   C.addTransition(C.getState()->set<DenotedSymbols>(Sym, E));
 }
 
Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
+++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
@@ -62,6 +62,8 @@
   virtual QualType getType() const = 0;
   virtual void Profile(llvm::FoldingSetNodeID &profile) = 0;
 
+  const SymExpr *ignoreCasts() const;
+
   /// Iterator over symbols that the current symbol depends on.
   ///
   /// For SymbolData, it's the symbol itself; for expressions, it's the
Index: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
+++ clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
@@ -135,8 +135,9 @@
            " (" + Visit(S->getRHS()) + ")";
   }
 
-  // TODO: SymbolCast doesn't appear in practice.
-  // Add the relevant code once it does.
+  std::string VisitSymbolCast(const SymbolCast *S) {
+    return "(" + S->getType().getAsString() + ")" + Visit(S->getOperand());
+  }
 
   std::string VisitSymbolicRegion(const SymbolicRegion *R) {
     // Explain 'this' object here.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to