ASDenysPetrov updated this revision to Diff 350579.
ASDenysPetrov added a comment.

Added a boolean option `handle-integral-cast-for-ranges` under 
`-analyzer-config` flag. Disabled the feature by default.

@Noq, @steakhal 
How do you think whether it's neccesory to add any changes in 
`SMTConstraintManager` in scope of this patch?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103096/new/

https://reviews.llvm.org/D103096

Files:
  clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
  clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
  clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
  clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
  clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
  clang/test/Analysis/analyzer-config.c
  clang/test/Analysis/bool-assignment.c
  clang/test/Analysis/range_casts.c
  clang/test/Analysis/symbol-integral-cast.cpp

Index: clang/test/Analysis/symbol-integral-cast.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-integral-cast.cpp
@@ -0,0 +1,353 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -analyzer-config eagerly-assume=false -analyzer-config handle-integral-cast-for-ranges=true -verify %s
+
+template <typename T>
+void clang_analyzer_eval(T);
+void clang_analyzer_warnIfReached();
+
+typedef short int16_t;
+typedef int int32_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+void test1(int x) {
+  // Even if two lower bytes of `x` equal to zero, it doesn't mean that
+  // the entire `x` is zero. We are not able to know the exact value of x.
+  // It can be one of  65536 possible values like [0, 65536, 131072, ...]
+  // and so on. To avoid huge range sets we still assume `x` in the range
+  // [INT_MIN, INT_MAX].
+  if (!(short)x) {
+    if (!x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test2(int x) {
+  // If two lower bytes of `x` equal to zero, and we know x to be 65537,
+  // which is not truncated to short as zero. Thus the branch is infisible.
+  short s = x;
+  if (!s) {
+    if (x == 65537)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test3(int x, short s) {
+  s = x;
+  if ((short)x > -10 && s < 10) {
+    if (x > 0 && x < 10) {
+      // If the range of the whole variable was constrained then reason again
+      // about truncated bytes to make the ranges more precise.
+      clang_analyzer_eval((short)x <= 0); // expected-warning {{FALSE}}
+    }
+  }
+}
+
+void test4(unsigned x) {
+  if ((char)x > 8) {
+    // Constraint the range of the lowest byte of `x` to [9, CHAR_MAX].
+    // The original range of `x` still remains [0, UINT_MAX].
+    clang_analyzer_eval((char)x < 42); // expected-warning {{UNKNOWN}}
+    if (x < 42) {
+      // Constraint the original range to [0, 42] and update (re-constraint)
+      // the range of the lowest byte of 'x' to [9, 42].
+      clang_analyzer_eval((char)x < 42); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test5(unsigned x) {
+  if ((char)x > -10 && (char)x < 10) {
+    if ((short)x == 8) {
+      // If the range of higher bytes(short) was constrained then reason again
+      // about smaller truncated ranges(char) to make it more precise.
+      clang_analyzer_eval((char)x == 8);  // expected-warning {{TRUE}}
+      clang_analyzer_eval((short)x == 8); // expected-warning {{TRUE}}
+      // We still assume full version of `x` in the range [INT_MIN, INT_MAX].
+      clang_analyzer_eval(x == 8); // expected-warning {{UNKNOWN}}
+    }
+  }
+}
+
+void test6(int x) {
+  // Even if two lower bytes of `x` less than zero, it doesn't mean that `x`
+  // can't be greater than zero. Thence we don't change the native range of
+  // `x` and this branch is feasible.
+  if (x > 0)
+    if ((short)x < 0)
+      clang_analyzer_eval(x > 0); // expected-warning {{TRUE}}
+}
+
+void test7(int x) {
+  // The range of two lower bytes of `x` [1, SHORT_MAX] is enough to cover
+  // all possible values of char [CHAR_MIN, CHAR_MAX]. So the lowest byte
+  // can be lower than zero.
+  if ((short)x > 0) {
+    if ((char)x < 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test8(int x) {
+  // Promotion from `signed int` to `signed long long` also reasoning about the
+  // original range, because we know the fact that even after promotion it
+  // remains in the range [INT_MIN, INT_MAX].
+  if ((long long)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{TRUE}}
+}
+
+void test9(signed int x) {
+  // Any cast `signed` to `unsigned` produces an unsigned range, which is
+  // [0, UNSIGNED_MAX] and can not be lower than zero.
+  if ((unsigned long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned int)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned short)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  if ((unsigned char)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test10(unsigned int x, signed char sc) {
+  // Promotion from `unsigned` to `signed` produces a signed range,
+  // which is able to cover all the values of the original,
+  // so that such cast is not lower than zero.
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]
+  // and can be lower than zero.
+  if ((signed int)x < 0)            // explicit cast
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  signed short ss = x; // initialization
+  if (ss < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+  sc = x; // assignment
+  if (sc < 0)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test11(unsigned int x) {
+  // Promotion from 'unsigned' to 'signed' entirely covers the original range.
+  // Thence such cast is not lower than zero and the `true` branch is
+  // infiseable. But it doesn't affect the original range, which still remains
+  // as [0, UNSIGNED_MAX].
+  if ((signed long long)x < 0)
+    clang_analyzer_warnIfReached(); // no-warning
+  else
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  // Any other cast(conversion or truncation) from `unsigned` to `signed`
+  // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]. But it doesn't
+  // affect the original range, which still remains as [0, UNSIGNED_MAX].
+  if ((signed int)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed short)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+
+  if ((signed char)x < 0)
+    clang_analyzer_eval(x < 0); // expected-warning {{FALSE}}
+}
+
+void test12(int x, char c) {
+  if (x >= 5308) {
+    if (x <= 5419) {
+      // Truncation on assignment: int[5308, 5419] -> char[-68, 43]
+      c = x;
+      clang_analyzer_eval(-68 <= c && c <= 43); // expected-warning {{TRUE}}
+
+      if (c < 50)
+        clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+      else
+        clang_analyzer_warnIfReached(); // no-warning
+
+      // Truncation on initializaion: int[5308, 5419] -> char[-68, 43]
+      char c1 = x;
+      clang_analyzer_eval(-68 <= c1 && c1 <= 43); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test13(int x) {
+  if (x > 913440767 && x < 913440769) { // 0x36720000
+
+    if ((short)x)                     // Truncation: int[913440768] -> short[0]
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+    if ((short)x != 0)
+      clang_analyzer_warnIfReached(); // no-warning
+    else
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  }
+}
+
+void test14(int x) {
+  if (x >= -1569193983 && x <= 578290016) {
+    // The big range of `x` covers all possible values of short.
+    // Truncation: int[-1569193983, 578290016] -> short[-32768, 32767]
+    if ((short)x > 0) {
+      clang_analyzer_eval(-1569193983 <= x && x <= 578290016); // expected-warning {{TRUE}}
+      short s = x;
+      clang_analyzer_eval(-32768 <= s && s <= 32767); // expected-warning {{TRUE}}
+    }
+  }
+}
+
+void test15(int x) {
+  if (x >= -1569193983 && x <= -1569193871) { // [0xA2780001, 0xA2780071]
+    // The small range of `x` covers only several values of short.
+    // Truncation: int[-1569193983, -1569193871] -> short[1, 113]
+    if ((short)x)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x > 0)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+
+    if ((short)x < 114)
+      clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+    else
+      clang_analyzer_warnIfReached(); // no-warning
+  }
+}
+
+void test16(char x) {
+  if (x < 0)
+    clang_analyzer_eval(-128 <= x && x < 0); // expected-warning {{TRUE}}
+  else
+    clang_analyzer_eval(0 <= x && x <= 127); // expected-warning {{TRUE}}
+}
+
+void test17(char x) {
+  if (-11 <= x && x <= -10) {
+    unsigned u = x;
+    // Conversion: char[-11, -10] -> unsigned int[4294967285, 4294967286]
+    clang_analyzer_eval(4294967285 <= u && u <= 4294967286); // expected-warning {{TRUE}}
+    unsigned short us = x;
+    // Conversion: char[-11, -10] -> unsigned short[65525, 65526]
+    clang_analyzer_eval(65525 <= us && us <= 65526); // expected-warning {{TRUE}}
+    unsigned char uc = x;
+    // Conversion: char[-11, -10] -> unsigned char[245, 246]
+    clang_analyzer_eval(245 <= uc && uc <= 246); // expected-warning {{TRUE}}
+  }
+}
+
+void test18(char c, short s, int i) {
+  // Any char value always is less then 1000.
+  int OneThousand = 1000;
+  c = i;
+  if (c < OneThousand)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+
+  // Any short value always is greater then 40000.
+  int MinusFourtyThousands = -40000;
+  s = i;
+  if (s > MinusFourtyThousands)
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+  else
+    clang_analyzer_warnIfReached(); // no-warning
+}
+
+void test19(char x, short y) {
+  if (-43 <= x && x <= -42) { // x[-42, -43]
+    y = 42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test20(char x, short y) {
+  if (42 <= y && y <= 43) { // y[42, 43]
+    x = -42;
+    clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}}
+
+    clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}}
+    clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}}
+
+    clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}}
+    clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}}
+  }
+}
+
+void test21(unsigned x) {
+  if (x > 42) {
+    // Unsigned range can generate two signed ranges.
+    // Conversion: unsigned[43, 4294967295] -> int[-2147483648, -1]U[43, 2147483647]
+    int i = x;                             // initialization
+    clang_analyzer_eval(-1 < i && i < 43); // expected-warning {{FALSE}}
+  }
+}
+
+void test22(int x, unsigned u) {
+  if (x > -42) {
+    // Signed range can generate two unsigned ranges.
+    // Conversion: int[-41, 2147483647] -> unsigned[0, 2147483647]U[4294967255, 4294967295]
+    u = x;                                                 // assignment
+    clang_analyzer_eval(2147483647 < u && u < 4294967255); // expected-warning {{FALSE}}
+  }
+}
Index: clang/test/Analysis/range_casts.c
===================================================================
--- clang/test/Analysis/range_casts.c
+++ clang/test/Analysis/range_casts.c
@@ -1,5 +1,5 @@
 // This test checks that intersecting ranges does not cause 'system is over constrained' assertions in the case of eg: 32 bits unsigned integers getting their range from 64 bits signed integers.
-// RUN: %clang_analyze_cc1 -triple x86_64-pc-linux-gnu -analyzer-checker=core,debug.ExprInspection -analyzer-store=region -verify %s
+// RUN: %clang_analyze_cc1 -triple x86_64-pc-linux-gnu -analyzer-checker=core,debug.ExprInspection -analyzer-store=region -analyzer-config handle-integral-cast-for-ranges=true -verify %s
 
 void clang_analyzer_warnIfReached();
 
Index: clang/test/Analysis/bool-assignment.c
===================================================================
--- clang/test/Analysis/bool-assignment.c
+++ clang/test/Analysis/bool-assignment.c
@@ -46,7 +46,7 @@
 #ifdef ANALYZER_CM_Z3
     BOOL x = y; // expected-warning {{Assignment of a non-Boolean value}}
 #else
-    BOOL x = y; // no-warning
+    BOOL x = y; // expected-warning {{Assignment of a non-Boolean value}}
 #endif
     return;
   }
Index: clang/test/Analysis/analyzer-config.c
===================================================================
--- clang/test/Analysis/analyzer-config.c
+++ clang/test/Analysis/analyzer-config.c
@@ -81,6 +81,7 @@
 // CHECK-NEXT: exploration_strategy = unexplored_first_queue
 // CHECK-NEXT: faux-bodies = true
 // CHECK-NEXT: graph-trim-interval = 1000
+// CHECK-NEXT: handle-integral-cast-for-ranges = false
 // CHECK-NEXT: inline-lambdas = true
 // CHECK-NEXT: ipa = dynamic-bifurcate
 // CHECK-NEXT: ipa-always-inline-size = 3
Index: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -536,8 +536,13 @@
       // We only handle LHS as simple symbols or SymIntExprs.
       SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
 
+      // Unwrap SymbolCast trying to find SymIntExpr inside.
+      SymbolRef S = Sym;
+      while (isa<SymbolCast>(S))
+        S = cast<SymbolCast>(S)->getOperand();
+
       // LHS is a symbolic expression.
-      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
+      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(S)) {
 
         // Is this a logical not? (!x is represented as x == 0.)
         if (op == BO_EQ && rhs.isZeroConstant()) {
Index: clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -500,44 +500,6 @@
   return true;
 }
 
-// Handles casts of type CK_IntegralCast.
-// At the moment, this function will redirect to evalCast, except when the range
-// of the original value is known to be greater than the max of the target type.
-SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val,
-                                   QualType castTy, QualType originalTy) {
-  // No truncations if target type is big enough.
-  if (getContext().getTypeSize(castTy) >= getContext().getTypeSize(originalTy))
-    return evalCast(val, castTy, originalTy);
-
-  SymbolRef se = val.getAsSymbol();
-  if (!se) // Let evalCast handle non symbolic expressions.
-    return evalCast(val, castTy, originalTy);
-
-  // Find the maximum value of the target type.
-  APSIntType ToType(getContext().getTypeSize(castTy),
-                    castTy->isUnsignedIntegerType());
-  llvm::APSInt ToTypeMax = ToType.getMaxValue();
-  NonLoc ToTypeMaxVal =
-      makeIntVal(ToTypeMax.isUnsigned() ? ToTypeMax.getZExtValue()
-                                        : ToTypeMax.getSExtValue(),
-                 castTy)
-          .castAs<NonLoc>();
-  // Check the range of the symbol being casted against the maximum value of the
-  // target type.
-  NonLoc FromVal = val.castAs<NonLoc>();
-  QualType CmpTy = getConditionType();
-  NonLoc CompVal =
-      evalBinOpNN(state, BO_LE, FromVal, ToTypeMaxVal, CmpTy).castAs<NonLoc>();
-  ProgramStateRef IsNotTruncated, IsTruncated;
-  std::tie(IsNotTruncated, IsTruncated) = state->assume(CompVal);
-  if (!IsNotTruncated && IsTruncated) {
-    // Symbol is truncated so we evaluate it as a cast.
-    NonLoc CastVal = makeNonLoc(se, originalTy, castTy);
-    return CastVal;
-  }
-  return evalCast(val, castTy, originalTy);
-}
-
 //===----------------------------------------------------------------------===//
 // Cast methods.
 // `evalCast` is the main method
@@ -939,15 +901,67 @@
   } else {
     // Symbol to integer, float.
     QualType T = Context.getCanonicalType(SE->getType());
-    // If types are the same or both are integers, ignore the cast.
-    // FIXME: Remove this hack when we support symbolic truncation/extension.
-    // HACK: If both castTy and T are integers, ignore the cast.  This is
-    // not a permanent solution.  Eventually we want to precisely handle
-    // extension/truncation of symbolic integers.  This prevents us from losing
-    // precision when we assign 'x = y' and 'y' is symbolic and x and y are
-    // different integer types.
-    if (haveSameType(T, CastTy))
-      return V;
+    // If castTy and T are different integers, return a cast of the given
+    // symbol. This helps RangeConstraintManager to recognize the cast and
+    // reason about new ranges. See symbolic truncation/extension/conversions
+    // handles in SymbolicRangeInferrer::VisitSymbolCast.
+    if (T->isIntegralOrEnumerationType() &&
+        CastTy->isIntegralOrEnumerationType()) {
+      auto &Opts =
+          StateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions();
+      // If types are the same or appropriate option is disabled,
+      // ignore the cast.
+      // NOTE: ShouldHandleIntegralCastForRanges is `false` by default.
+      if (T == CastTy || !Opts.ShouldHandleIntegralCastForRanges)
+        return V;
+      // Simplify SymbolCast if no truncation occurred. This reduce unnecessary
+      // nested types between the root operand and the outermost type.
+      // E.g. (short)(int)(long)(char x) equivalent to (short)(char x).
+      if (isa<SymbolCast>(SE)) {
+        ASTContext &Ctx = getContext();
+        SymbolRef OperandSym = cast<SymbolCast>(SE)->getOperand();
+        QualType OperandTy = OperandSym->getType();
+        uint32_t OperandBitWidth = Ctx.getIntWidth(OperandTy);
+        uint32_t SymBitWidth = Ctx.getIntWidth(T);
+        uint32_t CastBitWidth = Ctx.getIntWidth(CastTy);
+        const bool isTruncated = (OperandBitWidth > SymBitWidth);
+        if (isTruncated) {
+          const bool isMoreTruncated = (SymBitWidth >= CastBitWidth);
+          if (isMoreTruncated) {
+            // Cast type bit width is less than the current:
+            // - (char)(short)(int x) -> (char)int x.
+            SE = OperandSym;
+            T = CastTy;
+          }
+        } else {
+          // Original and cast types are equal:
+          // - (ushort)(int)(ushort x) -> ushort x;
+          // - (char)(uint)(char x) -> char x.
+          if (OperandTy == CastTy)
+            return nonloc::SymbolVal(OperandSym);
+          // Current and cast type bit widths or signs are equal:
+          // - (signed)(unsigned)(short x) -> (signed)short x;
+          // - (unsigned)(signed)(short x) -> (unsigned)short x;
+          // - (unsigned long long)(unsigned int)(short x) ->
+          // (unsigned long long)short x;
+          // - (long long)(int)(short x) -> (long long)short x;
+          if ((SymBitWidth == CastBitWidth) ||
+              (T->isSignedIntegerOrEnumerationType() ==
+               CastTy->isSignedIntegerOrEnumerationType())) {
+            SE = OperandSym;
+            T = OperandTy;
+          }
+          // Current and cast type signs are different:
+          // - (signed long long)(unsigned int)(short x) ->
+          // (signed long long)(unsigned int)short x;
+          // - (unsigned long long)(signed int)(short x) ->
+          // (unsigned long long)(signed int)short x;
+          // Do nothing extra.
+          // This prevent wrong `sext` and `zext` actions.
+        }
+      }
+      return makeNonLoc(SE, T, CastTy);
+    }
     if (!Loc::isLocType(CastTy))
       if (!IsUnknownOriginalType || !CastTy->isFloatingType() ||
           T->isFloatingType())
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -694,6 +694,7 @@
 
 namespace {
 class EquivalenceClass;
+class NominalTypeList;
 } // end anonymous namespace
 
 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
@@ -754,14 +755,16 @@
 
   LLVM_NODISCARD static inline ProgramStateRef
   markDisequal(BasicValueFactory &BV, RangeSet::Factory &F,
-               ProgramStateRef State, SymbolRef First, SymbolRef Second);
+               ProgramStateRef State, NominalTypeList &NTL, SymbolRef First,
+               SymbolRef Second);
   LLVM_NODISCARD static inline ProgramStateRef
   markDisequal(BasicValueFactory &BV, RangeSet::Factory &F,
-               ProgramStateRef State, EquivalenceClass First,
-               EquivalenceClass Second);
+               ProgramStateRef State, NominalTypeList &NTL,
+               EquivalenceClass First, EquivalenceClass Second);
   LLVM_NODISCARD inline ProgramStateRef
   markDisequal(BasicValueFactory &BV, RangeSet::Factory &F,
-               ProgramStateRef State, EquivalenceClass Other) const;
+               ProgramStateRef State, NominalTypeList &NTL,
+               EquivalenceClass Other) const;
   LLVM_NODISCARD static inline ClassSet
   getDisequalClasses(ProgramStateRef State, SymbolRef Sym);
   LLVM_NODISCARD inline ClassSet
@@ -821,8 +824,8 @@
   static inline bool
   addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
                        BasicValueFactory &BV, RangeSet::Factory &F,
-                       ProgramStateRef State, EquivalenceClass First,
-                       EquivalenceClass Second);
+                       ProgramStateRef State, NominalTypeList &NTL,
+                       EquivalenceClass First, EquivalenceClass Second);
 
   /// This is a unique identifier of the class.
   uintptr_t ID;
@@ -1009,6 +1012,35 @@
 //                           Symbolic reasoning logic
 //===----------------------------------------------------------------------===//
 
+class NominalTypeList {
+  CanQualType Types[4];
+
+public:
+  using Iterator = CanQualType *;
+
+  NominalTypeList(ASTContext &C)
+      : Types{C.Char8Ty, C.Char16Ty, C.Char32Ty, C.LongLongTy} {}
+  Iterator findByWidth(uint32_t Width) {
+    int index = 4;
+    switch (Width) {
+    case 8:
+      index = 0;
+      break;
+    case 16:
+      index = 1;
+      break;
+    case 32:
+      index = 2;
+      break;
+    case 64:
+      index = 3;
+    };
+    return Types + index;
+  }
+  Iterator begin() { return std::begin(Types); }
+  Iterator end() { return std::end(Types); }
+};
+
 /// A little component aggregating all of the reasoning we have about
 /// the ranges of symbolic expressions.
 ///
@@ -1019,11 +1051,60 @@
 public:
   template <class SourceType>
   static RangeSet inferRange(BasicValueFactory &BV, RangeSet::Factory &F,
-                             ProgramStateRef State, SourceType Origin) {
-    SymbolicRangeInferrer Inferrer(BV, F, State);
+                             ProgramStateRef State, NominalTypeList &NTL,
+                             SourceType Origin) {
+    SymbolicRangeInferrer Inferrer(BV, F, State, NTL);
     return Inferrer.infer(Origin);
   }
 
+  RangeSet VisitSymbolCast(const SymbolCast *Sym) {
+    // Unwrap symbol to get an underlying symbol.
+    // Store every next type except the inner(original) one.
+    SmallVector<QualType, 2> Types;
+    uint32_t MinBitWidth = UINT32_MAX;
+    SymbolRef RootSym = Sym;
+    ASTContext &C = ValueFactory.getContext();
+    do {
+      // We only handle integral cast, when all the types are integrals.
+      // Otherwise, pass the expression to VisitSymExpr.
+      QualType T = RootSym->getType();
+      if (!T->isIntegralOrEnumerationType())
+        return VisitSymExpr(Sym);
+
+      MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T));
+      Types.push_back(T);
+      RootSym = cast<SymbolCast>(RootSym)->getOperand();
+    } while (isa<SymbolCast>(RootSym));
+    // Now RootSym is the root symbol.
+
+    QualType RootTy = RootSym->getType();
+    const uint32_t RootBitWidth = C.getIntWidth(RootTy);
+
+    // Check if we had any truncated ranges of the root symbol,
+    // which are more precise for reasoning about other bigger truncations.
+    const RangeSet *RS = nullptr;
+    auto It = NominalTypes.findByWidth(MinBitWidth);
+    auto E = NominalTypes.findByWidth(RootBitWidth);
+    for (; !RS && It < E; ++It) {
+      SymbolRef S =
+          State->getSymbolManager().getCastSymbol(RootSym, RootTy, *It);
+      RS = getConstraint(State, S);
+    }
+    // If we didn't find any truncated ranges, look for the original range.
+    if (!RS)
+      RS = getConstraint(State, RootSym);
+
+    // If there's no existing range, create it based on type.
+    RangeSet OriginalRS = RS ? *RS : infer(RootTy);
+
+    // Cast original range to the types from inner to outer one by one.
+    auto TypesReversedRange = llvm::make_range(Types.rbegin(), Types.rend());
+    for (const QualType T : TypesReversedRange)
+      OriginalRS = RangeFactory.castTo(OriginalRS, T);
+
+    return OriginalRS;
+  }
+
   RangeSet VisitSymExpr(SymbolRef Sym) {
     // If we got to this function, the actual type of the symbolic
     // expression is not supported for advanced inference.
@@ -1046,8 +1127,8 @@
 
 private:
   SymbolicRangeInferrer(BasicValueFactory &BV, RangeSet::Factory &F,
-                        ProgramStateRef S)
-      : ValueFactory(BV), RangeFactory(F), State(S) {}
+                        ProgramStateRef S, NominalTypeList &NTL)
+      : ValueFactory(BV), RangeFactory(F), State(S), NominalTypes(NTL) {}
 
   /// Infer range information from the given integer constant.
   ///
@@ -1392,6 +1473,7 @@
   BasicValueFactory &ValueFactory;
   RangeSet::Factory &RangeFactory;
   ProgramStateRef State;
+  NominalTypeList &NominalTypes;
 };
 
 //===----------------------------------------------------------------------===//
@@ -1565,7 +1647,8 @@
 class RangeConstraintManager : public RangedConstraintManager {
 public:
   RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
-      : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
+      : RangedConstraintManager(EE, SVB), F(getBasicVals()),
+        NTL(SVB.getContext()) {}
 
   //===------------------------------------------------------------------===//
   // Implementation for interface from ConstraintManager.
@@ -1631,6 +1714,10 @@
 
 private:
   RangeSet::Factory F;
+  NominalTypeList NTL;
+
+  std::tuple<ProgramStateRef, SymbolRef, RangeSet>
+  handleSymbolCast(ProgramStateRef State, SymbolRef Sym, RangeSet R);
 
   RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
   RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
@@ -1698,7 +1785,8 @@
 
   ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
                                    SymbolRef RHS) {
-    return EquivalenceClass::markDisequal(getBasicVals(), F, State, LHS, RHS);
+    return EquivalenceClass::markDisequal(getBasicVals(), F, State, NTL, LHS,
+                                          RHS);
   }
 
   ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
@@ -1966,25 +2054,23 @@
   return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
 }
 
-inline ProgramStateRef EquivalenceClass::markDisequal(BasicValueFactory &VF,
-                                                      RangeSet::Factory &RF,
-                                                      ProgramStateRef State,
-                                                      SymbolRef First,
-                                                      SymbolRef Second) {
-  return markDisequal(VF, RF, State, find(State, First), find(State, Second));
+inline ProgramStateRef
+EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF,
+                               ProgramStateRef State, NominalTypeList &NTL,
+                               SymbolRef First, SymbolRef Second) {
+  return markDisequal(VF, RF, State, NTL, find(State, First),
+                      find(State, Second));
 }
 
-inline ProgramStateRef EquivalenceClass::markDisequal(BasicValueFactory &VF,
-                                                      RangeSet::Factory &RF,
-                                                      ProgramStateRef State,
-                                                      EquivalenceClass First,
-                                                      EquivalenceClass Second) {
-  return First.markDisequal(VF, RF, State, Second);
+inline ProgramStateRef EquivalenceClass::markDisequal(
+    BasicValueFactory &VF, RangeSet::Factory &RF, ProgramStateRef State,
+    NominalTypeList &NTL, EquivalenceClass First, EquivalenceClass Second) {
+  return First.markDisequal(VF, RF, State, NTL, Second);
 }
 
 inline ProgramStateRef
 EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF,
-                               ProgramStateRef State,
+                               ProgramStateRef State, NominalTypeList &NTL,
                                EquivalenceClass Other) const {
   // If we know that two classes are equal, we can only produce an infeasible
   // state.
@@ -1997,10 +2083,10 @@
 
   // Disequality is a symmetric relation, so if we mark A as disequal to B,
   // we should also mark B as disequalt to A.
-  if (!addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, *this,
-                            Other) ||
-      !addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, Other,
-                            *this))
+  if (!addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, NTL,
+                            *this, Other) ||
+      !addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, NTL,
+                            Other, *this))
     return nullptr;
 
   assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
@@ -2015,7 +2101,7 @@
 inline bool EquivalenceClass::addToDisequalityInfo(
     DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
     BasicValueFactory &VF, RangeSet::Factory &RF, ProgramStateRef State,
-    EquivalenceClass First, EquivalenceClass Second) {
+    NominalTypeList &NTL, EquivalenceClass First, EquivalenceClass Second) {
 
   // 1. Get all of the required factories.
   DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
@@ -2038,7 +2124,7 @@
     if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
 
       RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
-          VF, RF, State, First.getRepresentativeSymbol());
+          VF, RF, State, NTL, First.getRepresentativeSymbol());
 
       FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
 
@@ -2354,12 +2440,13 @@
 
 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
                                           SymbolRef Sym) {
-  return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Sym);
+  return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, NTL, Sym);
 }
 
 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
                                           EquivalenceClass Class) {
-  return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Class);
+  return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, NTL,
+                                           Class);
 }
 
 //===------------------------------------------------------------------------===
@@ -2374,6 +2461,67 @@
 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
 // UINT_MAX, 0, 1, and 2.
 
+std::tuple<ProgramStateRef, SymbolRef, RangeSet>
+RangeConstraintManager::handleSymbolCast(ProgramStateRef State, SymbolRef Sym,
+                                         RangeSet R) {
+  QualType T = Sym->getType();
+  if (!T->isIntegralOrEnumerationType() || R.isEmpty())
+    return {State, Sym, R};
+
+  BasicValueFactory &BVF = getBasicVals();
+  ASTContext &C = BVF.getContext();
+  SymbolRef RootSym = Sym;
+  if (isa<SymbolCast>(Sym)) {
+    uint32_t MinBitWidth = UINT32_MAX;
+    do {
+      // We only handle integral cast, when all the types are integrals.
+      T = RootSym->getType();
+      if (!T->isIntegralOrEnumerationType())
+        return {State, Sym, R};
+      MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T));
+      RootSym = cast<SymbolCast>(RootSym)->getOperand();
+    } while (isa<SymbolCast>(RootSym));
+
+    QualType RootTy = RootSym->getType();
+    uint32_t RootBitWidth = C.getIntWidth(RootSym->getType());
+    const bool isTruncated = (MinBitWidth < RootBitWidth);
+    if (isTruncated) {
+      // Trancation occurred. High bits lost. We can't reason about ranges of
+      // the original(root) operand in this case. Just add a cast symbol to the
+      // constraint set.
+      // Create a new SymbolCast with a signed type and the least met size.
+      // E.g. (int)(uchar)x -> (char8)x
+      CanQualType Ty = *NTL.findByWidth(MinBitWidth);
+      R = F.castTo(R, Ty);
+      Sym = getSymbolManager().getCastSymbol(RootSym, RootTy, Ty);
+    } else {
+      // Promotion or conversion occurred. No bit lost.
+      // Get a range to the original(root) type and add to the constraint set.
+      R = F.castTo(R, RootTy);
+      Sym = RootSym;
+    }
+  }
+
+  T = Sym->getType();
+  for (auto It = NTL.findByWidth(C.getIntWidth(T)) - 1; It >= NTL.begin();
+       --It) {
+    SymbolRef S = State->getSymbolManager().getCastSymbol(
+        RootSym, RootSym->getType(), *It);
+    if (const RangeSet *RS = getConstraint(State, S)) {
+      RangeSet TruncR = F.castTo(R, *It);
+      TruncR = F.intersect(*RS, TruncR);
+      if (TruncR.isEmpty()) {
+        // This seems to be an infisible branch. Return an empty set.
+        R = TruncR;
+        break;
+      }
+      State = setConstraint(State, S, TruncR);
+    }
+  }
+
+  return {State, Sym, R};
+}
+
 ProgramStateRef
 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
                                     const llvm::APSInt &Int,
@@ -2388,6 +2536,7 @@
   RangeSet New = getRange(St, Sym);
   New = F.deletePoint(New, Point);
 
+  std::tie(St, Sym, New) = handleSymbolCast(St, Sym, New);
   return trackNE(New, St, Sym, Int, Adjustment);
 }
 
@@ -2405,6 +2554,7 @@
   RangeSet New = getRange(St, Sym);
   New = F.intersect(New, AdjInt);
 
+  std::tie(St, Sym, New) = handleSymbolCast(St, Sym, New);
   return trackEQ(New, St, Sym, Int, Adjustment);
 }
 
@@ -2442,6 +2592,8 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
+
+  std::tie(St, Sym, New) = handleSymbolCast(St, Sym, New);
   return trackNE(New, St, Sym, Int, Adjustment);
 }
 
@@ -2479,6 +2631,8 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
+
+  std::tie(St, Sym, New) = handleSymbolCast(St, Sym, New);
   return trackNE(New, St, Sym, Int, Adjustment);
 }
 
@@ -2516,7 +2670,13 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
-  return New.isEmpty() ? nullptr : setConstraint(St, Sym, New);
+
+  if (New.isEmpty())
+    return nullptr;
+
+  std::tie(St, Sym, New) = handleSymbolCast(St, Sym, New);
+
+  return setConstraint(St, Sym, New);
 }
 
 RangeSet
@@ -2560,7 +2720,13 @@
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
   RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
-  return New.isEmpty() ? nullptr : setConstraint(St, Sym, New);
+
+  if (New.isEmpty())
+    return nullptr;
+
+  std::tie(St, Sym, New) = handleSymbolCast(St, Sym, New);
+
+  return setConstraint(St, Sym, New);
 }
 
 ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
Index: clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -431,7 +431,7 @@
       case CK_IntegralCast: {
         // Delegate to SValBuilder to process.
         SVal V = state->getSVal(Ex, LCtx);
-        V = svalBuilder.evalIntegralCast(state, V, T, ExTy);
+        V = svalBuilder.evalCast(V, T, ExTy);
         state = state->BindExpr(CastE, LCtx, V);
         Bldr.generateNode(CastE, Pred, state);
         continue;
Index: clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
@@ -418,6 +418,13 @@
 
   ProgramStateRef State = C.getState();
 
+  // Unwrap symbolic expression to skip argument casts on function call.
+  // This is useful when there is no way for overloading function in C
+  // but we need to pass different types of arguments and
+  // implicit cast occures.
+  while (isa<SymbolCast>(Sym))
+    Sym = cast<SymbolCast>(Sym)->getOperand();
+
   C.addTransition(C.getState()->set<DenotedSymbols>(Sym, E));
 }
 
Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
+++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
@@ -104,24 +104,8 @@
 
   virtual ~SValBuilder() = default;
 
-  bool haveSameType(const SymExpr *Sym1, const SymExpr *Sym2) {
-    return haveSameType(Sym1->getType(), Sym2->getType());
-  }
-
-  bool haveSameType(QualType Ty1, QualType Ty2) {
-    // FIXME: Remove the second disjunct when we support symbolic
-    // truncation/extension.
-    return (Context.getCanonicalType(Ty1) == Context.getCanonicalType(Ty2) ||
-            (Ty1->isIntegralOrEnumerationType() &&
-             Ty2->isIntegralOrEnumerationType()));
-  }
-
   SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy);
 
-  // Handles casts of type CK_IntegralCast.
-  SVal evalIntegralCast(ProgramStateRef state, SVal val, QualType castTy,
-                        QualType originalType);
-
   virtual SVal evalMinus(NonLoc val) = 0;
 
   virtual SVal evalComplement(NonLoc val) = 0;
Index: clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -314,6 +314,12 @@
                 "Display the checker name for textual outputs",
                 true)
 
+ANALYZER_OPTION(bool, ShouldHandleIntegralCastForRanges,
+                "handle-integral-cast-for-ranges",
+                "Handle truncations, promotions and conversions for ranges of "
+                "integral symbols.",
+                false)
+
 //===----------------------------------------------------------------------===//
 // Unsigned analyzer options.
 //===----------------------------------------------------------------------===//
Index: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
+++ clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h
@@ -135,8 +135,9 @@
            " (" + Visit(S->getRHS()) + ")";
   }
 
-  // TODO: SymbolCast doesn't appear in practice.
-  // Add the relevant code once it does.
+  std::string VisitSymbolCast(const SymbolCast *S) {
+    return "(" + S->getType().getAsString() + ")" + Visit(S->getOperand());
+  }
 
   std::string VisitSymbolicRegion(const SymbolicRegion *R) {
     // Explain 'this' object here.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to