ASDenysPetrov updated this revision to Diff 425248.
ASDenysPetrov marked 4 inline comments as done.
ASDenysPetrov edited the summary of this revision.
ASDenysPetrov added a comment.

@martong thank you for the idea. I've tried to implement it. Could you look at 
the patch once again, please?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103096/new/

https://reviews.llvm.org/D103096

Files:
  clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
  clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
  clang/test/Analysis/symbol-integral-cast.cpp

Index: clang/test/Analysis/symbol-integral-cast.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-integral-cast.cpp
@@ -0,0 +1,374 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -analyzer-config eagerly-assume=false -analyzer-config support-symbolic-integer-casts=true -verify %s
+
+template <typename T>
+void clang_analyzer_eval(T);
+void clang_analyzer_warnIfReached();
+
+typedef short int16_t;
+typedef int int32_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+void test1(int x) {
+  // Even if two lower bytes of `x` equal to zero, it doesn't mean that
+  // the entire `x` is zero. We are not able to know the exact value of x.
+  // It can be one of  65536 possible values like [0, 65536, 131072, ...]
+  // and so on. To avoid huge range sets we still assume `x` in the range
+  // [INT_MIN, INT_MAX].
+  if (!(short)x) {
+    if (!x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test2(int x) {
+  // If two lower bytes of `x` equal to zero, and we know x to be 65537,
+  // which is not truncated to short as zero. Thus the branch is infisible.
+  short s = x;
+  if (!s) {
+    if (x == 65537)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test3(int x, short s) {
+  s = x;
+  if ((short)x > -10 && s < 10) {
+    if (x > 0 && x < 10) {
+      // If the range of the whole variable was constrained then reason again
+      // about truncated bytes to make the ranges more precise.
+      clang_analyzer_eval((short)x <= 0); // expected-warning {{FALSE}}
+    }
+  }
+}
+
+void test4(unsigned x) {
+  if ((char)x > 8) {
+    // Constraint the range of the lowest byte of `x` to [9, CHAR_MAX].
+    // The original range of `x` still remains [0, UINT_MAX].
+    clang_analyzer_eval((char)x < 42); // expected-warning {{UNKNOWN}}
+    if (x < 42) {
+      // Constraint the original range to [0, 41] and update (re-constraint)
+      // the range of the lowest byte of 'x' to [9, 41].
+      clang_analyzer_eval((char)x < 42); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test5(unsigned x) {
+  if ((char)x > -10 && (char)x < 10) {
+    if ((short)x == 8) {
+      // If the range of higher bytes(short) was constrained then reason again
+      // about smaller truncated ranges(char) to make it more precise.
+      clang_analyzer_eval((char)x == 8);  // expected-warning {{TRUE}}
+      clang_analyzer_eval((short)x == 8); // expected-warning {{TRUE}}
+      // We still assume full version of `x` in the range [INT_MIN, INT_MAX].
+      clang_analyzer_eval(x == 8); // expected-warning {{UNKNOWN}}
+    }
+  }
+}
+
+void test6(int x) {
+  // Even if two lower bytes of `x` less than zero, it doesn't mean that `x`
+  // can't be greater than zero. Thence we don't change the native range of
+  // `x` and this branch is feasible.
+  if (x > 0)
+    if ((short)x < 0)
+      clang_analyzer_eval(x > 0); // expected-warning {{TRUE}}
+}
+
+void test7(int x) {
+  // The range of two lower bytes of `x` [1, SHORT_MAX] is enough to cover
+  // all possible values of char [CHAR_MIN, CHAR_MAX]. So the lowest byte
+  // can be lower than zero.
+  if ((short)x > 0) {
+    if ((char)x < 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test8(int x) {
+  // Promotion from `signed int` to `signed long long` also reasoning about the
+  // original range, because we know the fact that even after promotion it
+  // remains in the range [INT_MIN, INT_MAX].
+  if ((long long)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{TRUE}}
+}
+
+void test9(signed int x) {
+  // Any cast `signed` to `unsigned` produces an unsigned range, which is
+  // [0, UNSIGNED_MAX] and can not be lower than zero.
+  if ((unsigned long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned int)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned short)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned char)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test10(unsigned int x, signed char sc) {
+  // Promotion from `unsigned` to `signed` produces a signed range,
+  // which is able to cover all the values of the original,
+  // so that such cast is not lower than zero.
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]
+  // and can be lower than zero.
+  if ((signed int)x < 0)            // explicit cast
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  signed short ss = x; // initialization
+  if (ss < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  sc = x; // assignment
+  if (sc < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test11(unsigned int x) {
+  // Promotion from 'unsigned' to 'signed' entirely covers the original range.
+  // Thence such cast is not lower than zero and the `true` branch is
+  // infiseable. But it doesn't affect the original range, which still remains
+  // as [0, UNSIGNED_MAX].
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]. But it doesn't
+  // affect the original range, which still remains as [0, UNSIGNED_MAX].
+  if ((signed int)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed short)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed char)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+}
+
+void test12(int x, char c) {
+  if (x >= 5308) {
+    if (x <= 5419) {
+      // Truncation on assignment: int[5308, 5419] -> char[-68, 43]
+      c = x;
+      clang_analyzer_eval(-68 <= c && c <= 43); // expected-warning {{TRUE}}
+
+      if (c < 50)
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+      else
+        clang_analyzer_warnIfReached(); // no-warning
+
+      // Truncation on initializaion: int[5308, 5419] -> char[-68, 43]
+      char c1 = x;
+      clang_analyzer_eval(-68 <= c1 && c1 <= 43); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test13(int x) {
+  if (x > 913440767 && x < 913440769) { // 0x36720000
+
+    if ((short)x)                     // Truncation: int[913440768] -> short[0]
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+    if ((short)x != 0)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test14(int x) {
+  if (x >= -1569193983 && x <= 578290016) {
+    // The big range of `x` covers all possible values of short.
+    // Truncation: int[-1569193983, 578290016] -> short[-32768, 32767]
+    if ((short)x > 0) {
+      clang_analyzer_eval(-1569193983 <= x && x <= 578290016); // expected-warning {{TRUE}}
+      short s = x;
+      clang_analyzer_eval(-32768 <= s && s <= 32767); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test15(int x) {
+  if (x >= -1569193983 && x <= -1569193871) { // [0xA2780001, 0xA2780071]
+    // The small range of `x` covers only several values of short.
+    // Truncation: int[-1569193983, -1569193871] -> short[1, 113]
+    if ((short)x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x > 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x < 114)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+  }
+}
+
+void test16(char x) {
+  if (x < 0)
+    clang_analyzer_eval(-128 <= x && x < 0); // expected-warning {{TRUE}}
+  else
+    clang_analyzer_eval(0 <= x && x <= 127); // expected-warning {{TRUE}}
+}
+
+void test17(char x) {
+  if (-11 <= x && x <= -10) {
+    unsigned u = x;
+    // Conversion: char[-11, -10] -> unsigned int[4294967285, 4294967286]
+    clang_analyzer_eval(4294967285 <= u && u <= 4294967286); // expected-warning {{TRUE}}
+    unsigned short us = x;
+    // Conversion: char[-11, -10] -> unsigned short[65525, 65526]
+    clang_analyzer_eval(65525 <= us && us <= 65526); // expected-warning {{TRUE}}
+    unsigned char uc = x;
+    // Conversion: char[-11, -10] -> unsigned char[245, 246]
+    clang_analyzer_eval(245 <= uc && uc <= 246); // expected-warning {{TRUE}}
+  }
+}
+
+void test18(char c, short s, int i) {
+  // Any char value always is less then 1000.
+  int OneThousand = 1000;
+  c = i;
+  if (c < OneThousand)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+
+  // Any short value always is greater then 40000.
+  int MinusFourtyThousands = -40000;
+  s = i;
+  if (s > MinusFourtyThousands)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+}
+
+void test19(char x, short y) {
+  if (-43 <= x && x <= -42) { // x[-42, -43]
+    y = 42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test20(char x, short y) {
+  if (42 <= y && y <= 43) { // y[42, 43]
+    x = -42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test21(unsigned x) {
+  if (x > 42) {
+    // Unsigned range can generate two signed ranges.
+    // Conversion: unsigned[43, 4294967295] -> int[-2147483648, -1]U[43, 2147483647]
+    int i = x;                             // initialization
+    clang_analyzer_eval(-1 < i && i < 43); // expected-warning {{FALSE}}
+  }
+}
+
+void test22(int x, unsigned u) {
+  if (x > -42) {
+    // Signed range can generate two unsigned ranges.
+    // Conversion: int[-41, 2147483647] -> unsigned[0, 2147483647]U[4294967255, 4294967295]
+    u = x;                                                 // assignment
+    clang_analyzer_eval(2147483647 < u && u < 4294967255); // expected-warning {{FALSE}}
+  }
+}
+
+// PR51036
+void test23(signed char c) {
+  if ((unsigned int)c <= 200) {
+    // Conversion: char[0, 127] -> unsigned int[0, 127]
+    clang_analyzer_eval(0 <= c && c <= 127); // expected-warning {{TRUE}}
+  }
+}
+
+void test24(int x, int y) {
+  if (x == y) {
+    short s = x;
+    if (!s) {
+      if (y == 65537)
+        // FIXME: This should not warn. Support EquivalenceClasses.
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+      else
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    }
+  }
+}
Index: clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -543,3 +543,10 @@
 
   return VarContext->isParentOf(CurrentContext);
 }
+
+SymbolRef SymExpr::ignoreCasts() const {
+  SymbolRef Sym = this;
+  while (isa<SymbolCast>(Sym))
+    Sym = cast<SymbolCast>(Sym)->getOperand();
+  return Sym;
+}
Index: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -568,8 +568,11 @@
       // We only handle LHS as simple symbols or SymIntExprs.
       SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
 
+      // Unwrap SymbolCast trying to find SymIntExpr inside.
+      SymbolRef S = Sym->ignoreCasts();
+
       // LHS is a symbolic expression.
-      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
+      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(S)) {
 
         // Is this a logical not? (!x is represented as x == 0.)
         if (op == BO_EQ && rhs.isZeroConstant()) {
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -13,6 +13,7 @@
 
 #include "clang/Basic/JsonSupport.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
@@ -20,8 +21,8 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -885,6 +886,9 @@
 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
 REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
 
+REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(CastMap, uint32_t /*bitwidth*/, RangeSet)
+REGISTER_MAP_WITH_PROGRAMSTATE(SymCastMap, SymbolRef, CastMap)
+
 namespace {
 /// This class encapsulates a set of symbols equal to each other.
 ///
@@ -1212,12 +1216,89 @@
     return Inferrer.infer(Origin);
   }
 
-  RangeSet VisitSymExpr(SymbolRef Sym) {
-    // If we got to this function, the actual type of the symbolic
-    // expression is not supported for advanced inference.
-    // In this case, we simply backoff to the default "let's simply
-    // infer the range from the expression's type".
-    return infer(Sym->getType());
+  RangeSet VisitSymExpr(SymbolRef Sym) { return InferIntegralCasts(Sym); }
+
+  // Most likely there can be at most 3 levels of integer casts.
+  using NestedTypesOfSymbolCast = SmallVector<QualType, 3>;
+  // SymbolRef - root symbol;
+  // QualType - minimal QualType among the nested casts;
+  // uint32_t -  bitwidth of the minimal;
+  // NestedTypesOfSymbolCast - list of casted types.
+  //   E.g. (int)(short)(char x) -> vector{int, short, char}.
+  // bool -  parsing successfully occured.
+  using SymbolCastParsedStruct =
+      std::tuple<SymbolRef, QualType, uint32_t, NestedTypesOfSymbolCast, bool>;
+
+  SymbolCastParsedStruct parseSymbolCast(SymbolRef Sym) {
+    SymbolRef RootSym = Sym;
+    QualType MinTy;
+    uint32_t MinBitWidth = UINT32_MAX;
+    NestedTypesOfSymbolCast QualTypes;
+    bool Success = true;
+
+    ASTContext &C = ValueFactory.getContext();
+    do {
+      // We only handle integral cast, when all the types are integrals.
+      // Otherwise, return the range of all values of T.
+      QualType Ty = RootSym->getType();
+      if (!Ty->isIntegralOrEnumerationType()) {
+        Success = false;
+        break;
+      }
+      QualTypes.push_back(Ty);
+      const uint32_t IntWidth = C.getIntWidth(Ty);
+      if (MinBitWidth > IntWidth) {
+        MinBitWidth = IntWidth;
+        MinTy = Ty;
+      }
+      if (!isa<SymbolCast>(RootSym))
+        break;
+      RootSym = cast<SymbolCast>(RootSym)->getOperand();
+    } while (true);
+
+    return {RootSym, MinTy, MinBitWidth, QualTypes, Success};
+  }
+
+  RangeSet InferIntegralCasts(SymbolRef Sym) {
+    QualType T = Sym->getType();
+    AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions();
+    if (!Opts.ShouldSupportSymbolicIntegerCasts)
+      return infer(T);
+
+    // We only handle SymbolCast and single integers.
+    // Consider we have a cast symbol (int)(char)(short x), then we will have
+    // next results:
+    NestedTypesOfSymbolCast Types; // {int, char, short}
+    SymbolRef RootSym;             // short x
+    QualType MinTy;                // char
+    uint32_t MinBitWidth;          // 8
+    bool Success;                  // true
+    std::tie(RootSym, MinTy, MinBitWidth, Types, Success) =
+        parseSymbolCast(Sym);
+    // If the flag is false, then we faced non-integer symbol or non-integer
+    // cast.
+    if (!Success)
+      return infer(T);
+
+    // We get a range of the lowest type (`char`), because any `short` or `int`
+    // value will fit to `char` after the chain of casts.
+    RangeSet RS = infer(MinTy);
+    // Find corresponding map for `short x` as a root symbol.
+    if (const CastMap *CM = State->get<SymCastMap>(RootSym)) {
+      // Find a pair which bitwidth is bigger or equal to 8.
+      auto It = llvm::find_if(*CM, [MinBitWidth](CastMap::value_type &Item) {
+        return Item.first >= MinBitWidth;
+      });
+      if (It != CM->end())
+        RS = It.getData();
+    }
+    // Sequentially cast the range across the chain of types starting from the
+    // most inner one (short -> char -> int).
+    auto TypesReversed = llvm::make_range(Types.rbegin(), Types.rend());
+    for (const QualType T : TypesReversed)
+      RS = RangeFactory.castTo(RS, T);
+
+    return RS;
   }
 
   RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
@@ -1880,7 +1961,7 @@
 /// Derived class can control which types we handle by defining methods of the
 /// following form:
 ///
-///   bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
+///   bool assign${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
 ///                                       CONSTRAINT Constraint);
 ///
 /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
@@ -1997,7 +2078,7 @@
     return true;
   }
 
-  inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
+  inline bool assignSymExprToRangeSet(const SymExpr *Sym, RangeSet Constraint);
   inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
                                          RangeSet Constraint) {
     return handleRemainderOp(Sym, Constraint);
@@ -2076,8 +2157,8 @@
   LLVM_NODISCARD Optional<bool> interpreteAsBool(RangeSet Constraint) {
     assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
 
-    if (Constraint.getConcreteValue())
-      return !Constraint.getConcreteValue()->isZero();
+    if (const llvm::APSInt *Int = Constraint.getConcreteValue())
+      return !Int->isZero();
 
     if (!Constraint.containsZero())
       return true;
@@ -2085,14 +2166,30 @@
     return llvm::None;
   }
 
+  using SymbolCastParsedStruct = std::tuple<SymbolRef, uint32_t, bool>;
+  SymbolCastParsedStruct parseSymbolCast(SymbolRef Sym);
+  bool updateExistingConstraints(SymbolRef Sym, RangeSet R);
+
   ProgramStateRef State;
   SValBuilder &Builder;
   RangeSet::Factory &RangeFactory;
 };
 
+//===----------------------------------------------------------------------===//
+//                  ConstraintAssignor implementation details
+//===----------------------------------------------------------------------===//
+
+bool ConstraintAssignor::assignSymExprToRangeSet(const SymExpr *Sym,
+                                                 RangeSet Constraint) {
+  AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions();
+  if (Opts.ShouldSupportSymbolicIntegerCasts)
+    return updateExistingConstraints(Sym, Constraint);
+
+  // Next assignments is based on the fact that Constraint is a concrete value.
+  // Make sure of this.
+  if (!Constraint.getConcreteValue())
+    return true;
 
-bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
-                                              const llvm::APSInt &Constraint) {
   llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
   // Iterate over all equivalence classes and try to simplify them.
   ClassMembersTy Members = State->get<ClassMembers>();
@@ -2132,6 +2229,103 @@
   return true;
 }
 
+// SymbolRef - root symbol;
+// uint32_t -  bitwidth of the minimal;
+// bool -  parsing successfully occured.
+ConstraintAssignor::SymbolCastParsedStruct
+ConstraintAssignor::parseSymbolCast(SymbolRef Sym) {
+  ASTContext &C = Builder.getContext();
+  SymbolRef RootSym = Sym;
+  uint32_t MinBitWidth = UINT32_MAX;
+  bool Success = true;
+  do {
+    QualType Ty = RootSym->getType();
+    if (!Ty->isIntegralOrEnumerationType()) {
+      Success = false;
+      break;
+    }
+    MinBitWidth = std::min(MinBitWidth, C.getIntWidth(Ty));
+    if (!isa<SymbolCast>(RootSym))
+      break;
+    RootSym = cast<SymbolCast>(RootSym)->getOperand();
+  } while (true);
+
+  return {RootSym, MinBitWidth, Success};
+}
+
+/// Update map of constraints for the current and lower bitwidth.
+/// For example:
+/// - (int8)x update (int8)x;
+/// - (int32)x update (int32)x, (int16)x, (int8)x;
+///
+/// FIXME: Update bigger bitwidths. We only can reason about ranges of smaller
+/// bitwidths, because it would be too complicated to update, say, the entire
+/// `int32` range if you only have knowledge that its lowest byte has been
+/// changed. So we don't touch bigger bitwidths and they may be potentially
+/// invalid. For future:
+/// - (int8)x update (int8)x, (int16)x, (int32)x, (int64)x;
+/// - (int32)x update (int32)x, (int64)x;
+///
+/// \param Sym -- a considered symbol.
+/// \param R -- a new range for the given symbol.
+/// \return -- `true` if `Sym` is not the expected one, `false` if `Sym` has
+///  been successfully handled and all the constraints updated as well.
+/// \note: the function may set `State` to `nullptr`.
+bool ConstraintAssignor::updateExistingConstraints(SymbolRef Sym, RangeSet R) {
+  // We only handle SymbolCast and single integers.
+  // Consider we have a cast symbol (int)(char)(short x), then we will have
+  // next results:
+  SymbolRef RootSym;    // short x
+  uint32_t MinBitWidth; // 8
+  bool Success;         // true
+  std::tie(RootSym, MinBitWidth, Success) = parseSymbolCast(Sym);
+  // If the flag is false, then we faced non-integer symbol or non-integer cast.
+  if (!Success)
+    return true;
+
+  // Update an existing map or create a new one.
+  CastMap::Factory &CMF = State->get_context<CastMap>();
+  const CastMap *CM = State->get<SymCastMap>(RootSym);
+  CastMap NewCM = CM ? *CM : CMF.getEmptyMap();
+
+  // No matter what signedness set to APSIntType. We just need to choose one
+  // and use it to make intersection simplier while updating.
+  auto CastTo = [&](const uint32_t bitwidth) {
+    return RangeFactory.castTo(R, APSIntType(bitwidth, true));
+  };
+
+  // Update constraints in the map which bitwidth is equal or lower then
+  // `MinBitWidth`.
+  if (CM) {
+    for (auto &Item : *CM) {
+      // Stop after reaching a bigger bitwidth.
+      if (Item.first > MinBitWidth)
+        break;
+      RangeSet RS = RangeFactory.intersect(Item.second, CastTo(Item.first));
+      // If the intersection is empty, then the branch is infisible.
+      if (RS.isEmpty()) {
+        State = nullptr;
+        return false;
+      }
+      NewCM = CMF.add(NewCM, Item.first, RS);
+    }
+  }
+
+  // Add a new constraint for `MinBitWidth` if it does not exist in the map.
+  if (!CM || !CM->contains(MinBitWidth)) {
+    // LLVM-IR has the ability to represent integers with a bitwidth from 1
+    // all the way to 16'777'215. See
+    // https://blog.llvm.org/2020/04/the-new-clang-extint-feature-provides.html
+    assert(MinBitWidth >= 1 && MinBitWidth <= 16'777'215 &&
+           "LLVM-IR does not support such a bitwidth");
+    NewCM = CMF.add(NewCM, MinBitWidth, CastTo(MinBitWidth));
+  }
+
+  // Replace the map for the root symbol with updated one.
+  State = State->set<SymCastMap>(RootSym, NewCM);
+  return false;
+}
+
 bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
                                                     RangeSet Constraint) {
   if (!handleRemainderOp(Sym, Constraint))
@@ -3181,6 +3375,14 @@
     }
   }
 
+  SymCastMapTy SCM = State->get<SymCastMap>();
+  for (auto &CM : SCM) {
+    for (auto &P : CM.second) {
+      std::string Sym = toString(CM.first) + ":" + std::to_string(P.first);
+      OrderedConstraints.insert({Sym, P.second});
+    }
+  }
+
   ++Space;
   Out << '[' << NL;
   bool First = true;
Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
+++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
@@ -62,6 +62,8 @@
   virtual QualType getType() const = 0;
   virtual void Profile(llvm::FoldingSetNodeID &profile) = 0;
 
+  const SymExpr *ignoreCasts() const;
+
   /// Iterator over symbols that the current symbol depends on.
   ///
   /// For SymbolData, it's the symbol itself; for expressions, it's the
Index: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
+++ clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
@@ -135,8 +135,9 @@
            " (" + Visit(S->getRHS()) + ")";
   }
 
-  // TODO: SymbolCast doesn't appear in practice.
-  // Add the relevant code once it does.
+  std::string VisitSymbolCast(const SymbolCast *S) {
+    return "(" + S->getType().getAsString() + ")" + Visit(S->getOperand());
+  }
 
   std::string VisitSymbolicRegion(const SymbolicRegion *R) {
     // Explain 'this' object here.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to