[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2019-02-20 Thread Balogh , Ádám via Phabricator via cfe-commits
baloghadamsoftware updated this revision to Diff 187611.
baloghadamsoftware added a comment.

Instead of recording comparisons do an eager state split if the result is a 
symbolic value.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D53701/new/

https://reviews.llvm.org/D53701

Files:
  lib/StaticAnalyzer/Checkers/IteratorChecker.cpp

Index: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
+++ lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
@@ -133,8 +133,6 @@
   }
 };
 
-typedef llvm::PointerUnion RegionOrSymbol;
-
 // Structure to record the symbolic begin and end position of a container
 struct ContainerData {
 private:
@@ -172,41 +170,21 @@
   }
 };
 
-// Structure fo recording iterator comparisons. We needed to retrieve the
-// original comparison expression in assumptions.
-struct IteratorComparison {
-private:
-  RegionOrSymbol Left, Right;
-  bool Equality;
-
-public:
-  IteratorComparison(RegionOrSymbol L, RegionOrSymbol R, bool Eq)
-  : Left(L), Right(R), Equality(Eq) {}
-
-  RegionOrSymbol getLeft() const { return Left; }
-  RegionOrSymbol getRight() const { return Right; }
-  bool isEquality() const { return Equality; }
-  bool operator==(const IteratorComparison ) const {
-return Left == X.Left && Right == X.Right && Equality == X.Equality;
-  }
-  bool operator!=(const IteratorComparison ) const {
-return Left != X.Left || Right != X.Right || Equality != X.Equality;
-  }
-  void Profile(llvm::FoldingSetNodeID ) const { ID.AddInteger(Equality); }
-};
-
 class IteratorChecker
 : public Checker, check::Bind,
- check::LiveSymbols, check::DeadSymbols,
- eval::Assume> {
+ check::LiveSymbols, check::DeadSymbols> {
 
   std::unique_ptr OutOfRangeBugType;
   std::unique_ptr MismatchedBugType;
   std::unique_ptr InvalidatedBugType;
 
-  void handleComparison(CheckerContext , const SVal , const SVal ,
-const SVal , OverloadedOperatorKind Op) const;
+  void handleComparison(CheckerContext , const Expr *CE, const SVal ,
+const SVal , const SVal ,
+OverloadedOperatorKind Op) const;
+  void processComparison(CheckerContext , ProgramStateRef State,
+ SymbolRef Sym1, SymbolRef Sym2, const SVal ,
+ OverloadedOperatorKind Op) const;
   void verifyAccess(CheckerContext , const SVal ) const;
   void verifyDereference(CheckerContext , const SVal ) const;
   void handleIncrement(CheckerContext , const SVal , const SVal ,
@@ -281,8 +259,6 @@
  CheckerContext ) const;
   void checkLiveSymbols(ProgramStateRef State, SymbolReaper ) const;
   void checkDeadSymbols(SymbolReaper , CheckerContext ) const;
-  ProgramStateRef evalAssume(ProgramStateRef State, SVal Cond,
- bool Assumption) const;
 };
 } // namespace
 
@@ -292,9 +268,6 @@
 
 REGISTER_MAP_WITH_PROGRAMSTATE(ContainerMap, const MemRegion *, ContainerData)
 
-REGISTER_MAP_WITH_PROGRAMSTATE(IteratorComparisonMap, const SymExpr *,
-   IteratorComparison)
-
 namespace {
 
 bool isIteratorType(const QualType );
@@ -324,16 +297,6 @@
 bool hasSubscriptOperator(ProgramStateRef State, const MemRegion *Reg);
 bool frontModifiable(ProgramStateRef State, const MemRegion *Reg);
 bool backModifiable(ProgramStateRef State, const MemRegion *Reg);
-BinaryOperator::Opcode getOpcode(const SymExpr *SE);
-const RegionOrSymbol getRegionOrSymbol(const SVal );
-const ProgramStateRef processComparison(ProgramStateRef State,
-RegionOrSymbol LVal,
-RegionOrSymbol RVal, bool Equal);
-const ProgramStateRef saveComparison(ProgramStateRef State,
- const SymExpr *Condition, const SVal ,
- const SVal , bool Eq);
-const IteratorComparison *loadComparison(ProgramStateRef State,
- const SymExpr *Condition);
 SymbolRef getContainerBegin(ProgramStateRef State, const MemRegion *Cont);
 SymbolRef getContainerEnd(ProgramStateRef State, const MemRegion *Cont);
 ProgramStateRef createContainerBegin(ProgramStateRef State,
@@ -343,21 +306,9 @@
const SymbolRef Sym);
 const IteratorPosition *getIteratorPosition(ProgramStateRef State,
 const SVal );
-const IteratorPosition *getIteratorPosition(ProgramStateRef State,
-RegionOrSymbol RegOrSym);
 ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal ,
 const IteratorPosition );
-ProgramStateRef setIteratorPosition(ProgramStateRef State,
-RegionOrSymbol RegOrSym,
-   

[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2019-02-20 Thread Balogh , Ádám via Phabricator via cfe-commits
baloghadamsoftware added a comment.
Herald added a subscriber: jdoerfert.

In D53701#1322255 , @NoQ wrote:

> Well, i mean, whenever you are inlining a method,  you //are// exposing 
> details of the "internals" of the inlined API to the user. The only 
> difference is whether this API itself deals with iterators. This sounds to me 
> as if we try not to inline iterator methods in general. Or try really hard to 
> prevent desynchronization between the two models.


When tracking multi-level iterator structures (iterator adapters or other 
constructs e.g. a leaf-iterator in a general tree) we mainly want to track the 
topmost level, but it is inaccurate without inlining their methods. We need the 
to track the real control flow. However, when reporting a bug we want to report 
it at the topmost level since the erroneous code is most probably the top-level 
code, not the API. That way we do not directly expose the layers below the API 
to the user. That is the reason that we have to handle all structures which 
behave as iterators as iterators, even if they contain iterators themselves. 
Since error-checking happens in the `checkPre...()` hooks, bugs are 
automatically catched at the topmost level.

> Ok, how about an eager state split for now?

Do you mean that upon iterator comparison I do not record the comparison and 
later retrieve it in the branches with concrete results but I do the 
state-split myself in `checkPostCall()`?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D53701/new/

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-12-06 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

In D53701#1318566 , 
@baloghadamsoftware wrote:

> In D53701#1315242 , @NoQ wrote:
>
> >
>
>
> When using iterator adapters of the STL the bugs may also be filtered out by 
> the analyzer if this option is enabled.


Mmm, are you talking about `c++-container-inlining`? This option currently 
prevents inlining of container methods. STL iterators shouldn't be affected, 
and even if they were affected, they will simply fall back to conservative 
evaluation.

On the other hand, this is how i *want* this option to work. I.e., instead of 
suppressing inlining, it should suppress bugs via visitors when an 
//interesting// event happen within a container, where "interesting" is 
potentially checker-specific and defined in every visitor separately. I believe 
that most checkers will be unaffected. Additionally, it still doesn't affect 
iterators.

> I think that only tracking the inner iterator and treating the outer iterator 
> as a non-iterator is a nightmare from the user's perspective: all detections 
> seem to be "internal" errors of the underlying API and thus regarded as 
> "probably false positives". (...) The user must see the errors on the topmost 
> level whenever possible to understand them.

Well, i mean, whenever you are inlining a method,  you //are// exposing details 
of the "internals" of the inlined API to the user. The only difference is 
whether this API itself deals with iterators. This sounds to me as if we try 
not to inline iterator methods in general. Or try really hard to prevent 
desynchronization between the two models.

Ok, how about an eager state split for now?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D53701/new/

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-12-04 Thread Balogh , Ádám via Phabricator via cfe-commits
baloghadamsoftware added a comment.

In D53701#1315242 , @NoQ wrote:

> I think it's worth a try to do a total evalCall at first, and then disable 
> evalCall (together with the attempt to model the iterator position) in an 
> incrementally growing blacklist of cases (1. iterator adaptors, 2. ) as 
> we encounter problems. This essentially becomes part of the logic that 
> decides whether an object is an iterator. Eg., if it's more like an adaptor 
> than an actual iterator, let's treat it as if it's not an iterator, but 
> inline instead, and hope that we model the underlying iterators correctly via 
> evalCall.


I think that only tracking the inner iterator and treating the outer iterator 
as a non-iterator is a nightmare from the user's perspective: all detections 
seem to be "internal" errors of the underlying API and thus regarded as 
"probably false positives". When using iterator adapters of the STL the bugs 
may also be filtered out by the analyzer if this option is enabled. The user 
must see the errors on the topmost level whenever possible to understand them.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D53701/new/

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-11-30 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.
Herald added a subscriber: gamesh411.

In D53701#1288682 , 
@baloghadamsoftware wrote:

> I am almost sure it is implossible. It is not only about iterator-adapters in 
> the classical sense but also any iterator that internally deals with other 
> iterators.
>
> For example I created an iterator in the past that iterates over the elements 
> of a list of lists. This iterator contained two iterators, one for the outer 
> and one for the inner list. In this case determining whether two iterators 
> are the same is non-trivial: for all in-range iterators both the outer and 
> the inner iterators must be the same. However if the outer iterator is the 
> past-end iterator of the outer list then the inner iterator is irrelevant. 
> Thus the comparison operator of such iterator must check first whether any of 
> the outer iterators is the past-end iterator and only compare the inner 
> iterator none of them is. If both outer iterators are the past-end iterator 
> then they are equal regardless of the inner iterators.
>
> Another example could be an iterator that somehow merges two lists internally 
> using two different iterators. In this case only one of the inner iterators 
> is relevant when comparing two merging iterators.
>
> So by dropping the inlining we lose some intrinsic knowledge of the analyzed 
> code which leads the checker to wrong assumptions.


The nested iterator thing looks easy to detect heuristically. Just have a look 
if any of the fields within the object are of iterator type.

I think it's worth a try to do a total evalCall at first, and then disable 
evalCall (together with the attempt to model the iterator position) in an 
incrementally growing blacklist of cases (1. iterator adaptors, 2. ) as we 
encounter problems. This essentially becomes part of the logic that decides 
whether an object is an iterator. Eg., if it's more like an adaptor than an 
actual iterator, let's treat it as if it's not an iterator, but inline instead, 
and hope that we model the underlying iterators correctly via evalCall.

This does look hacky, but it does look less hacky than trying to align two 
models of the iterator position. Or is it actually necessary to maintain the 
two models of the iterator position in order to avoid these false positives? If 
so, could you give an example?

> When I first started working on Clang Static Analyzer Anna told me the 
> `-analyzer-eagerly-assume` should be the default.

Which is why i suggest a similar behavior here.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D53701/new/

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-11-06 Thread Balogh , Ádám via Phabricator via cfe-commits
baloghadamsoftware added a comment.

In https://reviews.llvm.org/D53701#1288258, @NoQ wrote:

> Mmm, is it possible to detect adapters and inline them as an exception from 
> the rule? You can foresee a pretty complicated system of rules and exceptions 
> if we go down this path, but i believe that it is still much easier and more 
> reliable than the system that tries to synchronize two different models of 
> iterators, so i really encourage you to at least give it a try somehow.


I am almost sure it is implossible. It is not only about iterator-adapters in 
the classical sense but also any iterator that internally deals with other 
iterators.

For example I created an iterator in the past that iterates over the elements 
of a list of lists. This iterator contained two iterators, one for the outer 
and one for the inner list. In this case determining whether two iterators are 
the same is non-trivial: for all in-range iterators both the outer and the 
inner iterators must be the same. However if the outer iterator is the past-end 
iterator of the outer list then the inner iterator is irrelevant. Thus the 
comparison operator of such iterator must check first whether any of the outer 
iterators is the past-end iterator and only compare the inner iterator none of 
them is. If both outer iterators are the past-end iterator then they are equal 
regardless of the inner iterators.

Another example could be an iterator that somehow merges two lists internally 
using two different iterators. In this case only one of the inner iterators is 
relevant when comparing two merging iterators.

So by dropping the inlining we lose some intrinsic knowledge of the analyzed 
code which leads the checker to wrong assumptions.

>> I  wonder whether the current "mixed" approach introduces additional paths 
>> because we do not do explicit state-splits and function 
>> `processComparison()` removes contradicting branches.
> 
> I believe that the effect of eager state splits is going to be roughly 
> similar to the actual -analyzer-eagerly-assume:
> 
> 1. Split paths earlier than it is absolutely necessary, which may slow down 
> the analysis and duplicate some work, but most of the time it'll be just a 
> few nodes before the actual check in the code would have caused a split 
> anyway.
> 2. Simplify constraint solving on each of the new paths, which will indeed 
> help with refuting some invalid paths, especially those in which new 
> constraints over the symbols are introduced after the check, but that's due 
> to pecularities of our constraint solver, i.e. rather accidentally than 
> intentionally.

When I first started working on Clang Static Analyzer Anna told be the 
`-analyzer-eagerly-assume` should be the default. In the iterator checkers the 
refutation happens intentionally in the functions `relateSymbols()` and 
`processComparison()`.


Repository:
  rC Clang

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-11-05 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

In https://reviews.llvm.org/D53701#1287007, @baloghadamsoftware wrote:

> ...on iterator-adapters inlining ensures that we handle the comparison of the 
> underlying iterator correctly. Without inlining, `evalCall()` only works on 
> the outermost iterator which is not always correct: it may happen that the 
> checker cannot conclude any useful relation between the two iterator-adapters 
> but there are some relations stored about the inner iterators. Based on my 
> experiments quite many of the false-positives are related to 
> iterator-adapters.  So I am afraid that we introduce even more 
> false-positives by losing inlining.


Mmm, is it possible to detect adapters and inline them as an exception from the 
rule? You can foresee a pretty complicated system of rules and exceptions if we 
go down this path, but i believe that it is still much easier and more reliable 
than the system that tries to synchronize two different models of iterators, so 
i really encourage you to at least give it a try somehow.

> I  wonder whether the current "mixed" approach introduces additional paths 
> because we do not do explicit state-splits and function `processComparison()` 
> removes contradicting branches.

I believe that the effect of eager state splits is going to be roughly similar 
to the actual -analyzer-eagerly-assume:

1. Split paths earlier than it is absolutely necessary, which may slow down the 
analysis and duplicate some work, but most of the time it'll be just a few 
nodes before the actual check in the code would have caused a split anyway.
2. Simplify constraint solving on each of the new paths, which will indeed help 
with refuting some invalid paths, especially those in which new constraints 
over the symbols are introduced after the check, but that's due to pecularities 
of our constraint solver, i.e. rather accidentally than intentionally.




Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:1197-1199
   while (const auto *CBOR = Cont->getAs()) {
 Cont = CBOR->getSuperRegion();
   }

Szelethus wrote:
> You will have to excuse me for commenting on something totally unrelated, but 
> I'm afraid this may cause a crash, if the region returned by `getSuperRegion` 
> is symbolic. I encountered this error when doing the exact same thing in my 
> checker: D50892. Can something like this occur with this checker?
Hmm, had a look at the crash you mention here. Your code crashed because you 
additionally did `getAs`, which would turn your pointer into 
a null when a symbolic region is encountered. So the final code ended up a bit 
weird: there's no need to check that it's a `TypedValueRegion` before you check 
that it's a `CXXBaseObjectRegion`; just check for the latter directly. This 
code looks correct in this sense.

Also, since this code keeps copied around, do we need a better helper function 
for this unwrap idiom? I.e., something like `MemRegion::StripCasts()` that only 
unwraps derived-to-base casts?



Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:2066
"Symbol comparison must be a comparison");
 return assumeNoOverflow(NewState, cast(CompSym)->getLHS(), 2);
   }

baloghadamsoftware wrote:
> NoQ wrote:
> > P.S. What was the idea here? Like, `CompSym` was just computed via `BO_EQ` 
> > and has type of a condition, i.e. `bool` because we are in C++. Is this 
> > code trying to say that the result of the comparison is bounded by `true/2`?
> There is also a `->getLHS()` which means that we enforce the bound on the 
> left-hand side of the rearranged comparison. Although both symbols are 
> bounded by `max/4`, constraint manager does not imply that the sum/diff is 
> the bounded by `max/2`. I have to enforce this manually to prevent `min` 
> negated to `min` when the constraint manager negates the difference.
Ouch, right, didn't notice `getLHS()`, sorry!


Repository:
  rC Clang

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-11-05 Thread Balogh , Ádám via Phabricator via cfe-commits
baloghadamsoftware added a comment.

Your suggestion sounds completely reasonable. We cannot rely entirely on 
inlining of comparison operators. However, there is an important side-effect of 
inlining: on iterator-adapters inlining ensures that we handle the comparison 
of the underlying iterator correctly. Without inlining, `evalCall()` only works 
on the outermost iterator which is not always correct: it may happen that the 
checker cannot conclude any useful relation between the two iterator-adapters 
but there are some relations stored about the inner iterators. Based on my 
experiments quite many of the false-positives are related to iterator-adapters. 
 So I am afraid that we introduce even more false-positives by losing inlining.

I  wonder whether the current "mixed" approach introduces additional paths 
because we do not do explicit state-splits and function `processComparison()` 
removes contradicting branches.




Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:2066
"Symbol comparison must be a comparison");
 return assumeNoOverflow(NewState, cast(CompSym)->getLHS(), 2);
   }

NoQ wrote:
> P.S. What was the idea here? Like, `CompSym` was just computed via `BO_EQ` 
> and has type of a condition, i.e. `bool` because we are in C++. Is this code 
> trying to say that the result of the comparison is bounded by `true/2`?
There is also a `->getLHS()` which means that we enforce the bound on the 
left-hand side of the rearranged comparison. Although both symbols are bounded 
by `max/4`, constraint manager does not imply that the sum/diff is the bounded 
by `max/2`. I have to enforce this manually to prevent `min` negated to `min` 
when the constraint manager negates the difference.


Repository:
  rC Clang

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-11-02 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

Ok, so what this code does is, eg., for a call of `i1.operator==(i2)` that 
returns a symbol `$x` of type `bool`, it conserves the sacred checker-specific 
knowledge that `$x` is in fact equal to `$offset(i1) == $offset(i2)`, where 
`$offset(i)` is the offset symbol within `IteratorPosition` to which iterator 
`i` is mapped.

This looks like a renaming problem to me: what we really want to do is 
//rename// (i.e. update representation in the `SVal` hierarchy of) `$x` to 
`$offset(i1) == $offset(i2)`. And for now the single plausible approach to 
renaming problems in the Static Analyzer is to avoid them: //give the value a 
correct name from the start// so that you didn't have to rename it later.

What this means is that instead of waiting until `checkPostCall` to obtain `$x` 
and then trying to rename it into `$offset(i1) == $offset(i2)`, we should 
`evalCall` the comparison operator to return `$offset(i1) == $offset(i2)`. So 
that the symbol with the wrong name (i.e., `$x`) didn't appear in the program 
state in the first place.

The good thing about this approach is that it does not require any extra 
tracking at all - no comparison maps, no `evalAssume()`, nothing. The value is 
simply correct from the start.

As a side effect, you will have a chance (though still not forced) to avoid 
redundant invalidation of Store during evaluation of the operator call. This is 
the correct behavior for at least STL containers and probably for all 
containers ever created by mankind. Though of course you never know. I.e., what 
if the code under analysis measures complexity of vector sort procedure and 
increments a global counter every time two iterators are compared within that 
procedure? But at least for STL/boost/LLVM/WebKit containers this is probably 
the right thing to do.

***

Now, of course, `evalCall()` would suppress inlining. During `evalCall()` we 
currently do not know whether the function will be inlined or evaluated 
conservatively if none of the checkers evaluate it, but we can easily provide 
such information in `evalCall()`, so this is not a problem.

The problem with inlining is that we got names for iterator offsets wrong from 
the start, because we conjured them out of thin air and they are conflicting 
with the actual representation of offsets within the implementation of the 
container. Which brings us back to a renaming problem: the problem of renaming 
`$offset(i1) == $offset(i2)` into the actual return value `$x` that was 
computed via inlining. Moreover, this new renaming problem is ill-formed 
because renaming non-atomic symbols doesn't make any sense - we should instead 
rename `$offset(i1)` and `$offset(i2)` separately. Still, the problem is 
indeed, as you already noticed, solved easily when `$x` is concrete: we only 
need to assume that `$offset(i1) == $offset(i2)` or `$offset(i1) != 
$offset(i2)` depending on the concrete value of `$x`.

And if `$x` is symbolic, we could still //introduce a state split// here: on 
one branch both `$x` and `$offset(i1) == $offset(i2)` are false, on the other 
branch they both are true, and no additional tracking is ever be necessary. I 
believe that such state split is not invalid: it pretty much corresponds to the 
"eagerly assume" behavior, just for iterators. The question here is how much 
are we sure that both branches are possible. Even if neither inlining nor our 
custom iterator model managed to refute any of these two branches, one of the 
paths may still be infeasible. But the amount of error here is not larger than 
eagerly-assume, and for eagerly-assume it isn't that bad, so we could try. Of 
course, the alternative to state split is assuming things about `($offset(i1) 
== $offset(i2)) == $x`, but our constraint solver will not be able to handle 
such constraints, which is the very reason why we have problems with renaming 
(well, at least some of them; renaming temporary regions in C++ is slightly 
more complicated than that (understatement intended)). In fact, i think the 
reasoning behind having eager assumptions for numbers is exactly that: they 
wanted to make constraints simpler.

***

But still, i want to step back and ask the question that i really really want 
answered here: //if container methods are inlinable, do we actually want to 
track iterator positions manually?// Maybe just rely on inlining entirely when 
possible? Like, both for modeling and for bug-finding. Or only rely on 
`evalCall()`? Essentially, if inlining is not reliable enough for relying on it 
entirely (so we have to maintain a parallel checker-specific model of iterators 
and have these two models exchange information), why do we think that it is 
reliable enough for the purpose of evaluating iterator comparisons?

This question is in fact asked and answered, one way or another, intentionally 
or accidentally, with different levels of urgency, in every checker that tries 
to model effects of function calls. The most famous example of 

[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-10-29 Thread Umann Kristóf via Phabricator via cfe-commits
Szelethus added a comment.

Makes sense!




Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:39
 //currently treated as an lvalue.
 // * type-IIc, type-IIIc: compound values of iterator-typed objects, when the
 //iterator object is treated as an rvalue taken of a

Did you mean lazy compound values?



Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:174
 
-// Structure fo recording iterator comparisons. We needed to retrieve the
-// original comparison expression in assumptions.
-struct IteratorComparison {
+// Structure fo recording symbol comparisons. We need to retrieve the original
+// comparison expression in assumptions.

typo: for



Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:332-333
 BinaryOperator::Opcode getOpcode(const SymExpr *SE);
-const RegionOrSymbol getRegionOrSymbol(const SVal );
-const ProgramStateRef processComparison(ProgramStateRef State,
-RegionOrSymbol LVal,
-RegionOrSymbol RVal, bool Equal);
-const ProgramStateRef saveComparison(ProgramStateRef State,
- const SymExpr *Condition, const SVal 
,
- const SVal , bool Eq);
-const IteratorComparison *loadComparison(ProgramStateRef State,
- const SymExpr *Condition);
+const ProgramStateRef saveComparison(ProgramStateRef State, SymbolRef 
Condition,
+ SymbolRef LSym, SymbolRef RSym, bool Eq);
+const SymbolComparison *loadComparison(ProgramStateRef State,

I think this name would be better if you added the new state to 
`CheckerContext` within this function (and making it `void`), or rename it to 
`getEvaluatedComparisonState`.



Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:373
 SymbolRef NewSym, SymbolRef CondSym, BinaryOperator::Opcode Opc);
+ProgramStateRef relateSymbols(ProgramStateRef State, SymbolRef Sym1,
+  SymbolRef Sym2, bool Equal);

I think `evaluateComparison` would be a more fitting name.



Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:1197-1199
   while (const auto *CBOR = Cont->getAs()) {
 Cont = CBOR->getSuperRegion();
   }

You will have to excuse me for commenting on something totally unrelated, but 
I'm afraid this may cause a crash, if the region returned by `getSuperRegion` 
is symbolic. I encountered this error when doing the exact same thing in my 
checker: D50892. Can something like this occur with this checker?



Comment at: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp:2057-2058
   auto NewState = State->assume(comparison.castAs(), Equal);
+  if (!NewState)
+return nullptr;
+

It isn't obvious to me (at first) what happens here -- maybe document when this 
function will return with `nullptr`? When `relateSymbol` is called and checked 
whether the returned value is null or not, one could think that this symbolizes 
some sort of failure.


Repository:
  rC Clang

https://reviews.llvm.org/D53701



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53701: [Analyzer] Record and process comparison of symbols instead of iterator positions in interator checkers

2018-10-25 Thread Balogh , Ádám via Phabricator via cfe-commits
baloghadamsoftware created this revision.
baloghadamsoftware added a reviewer: NoQ.
baloghadamsoftware added a project: clang.
Herald added subscribers: donat.nagy, Szelethus, mikhail.ramalho, a.sidorin, 
rnkovacs, szepet, whisperity.
Herald added a reviewer: george.karpenkov.

Currently iterator checkers record comparison of iterator positions and process 
them for keeping track the distance between them (e.g. whether a position is 
the same as the end position). However this makes some processing unnecessarily 
complex and it is not needed at all: we only need to keep track between the 
abstract symbols stored in these iterator positions. This patch changes this 
and opens the path to comparisons to the `begin()` and `end()` symbols between 
the container (e.g. size, emptiness) which are stored as symbols, not iterator 
positions. The functionality of the checker is unchanged.


Repository:
  rC Clang

https://reviews.llvm.org/D53701

Files:
  lib/StaticAnalyzer/Checkers/IteratorChecker.cpp

Index: lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
+++ lib/StaticAnalyzer/Checkers/IteratorChecker.cpp
@@ -134,8 +134,6 @@
   }
 };
 
-typedef llvm::PointerUnion RegionOrSymbol;
-
 // Structure to record the symbolic begin and end position of a container
 struct ContainerData {
 private:
@@ -173,27 +171,31 @@
   }
 };
 
-// Structure fo recording iterator comparisons. We needed to retrieve the
-// original comparison expression in assumptions.
-struct IteratorComparison {
+// Structure fo recording symbol comparisons. We need to retrieve the original
+// comparison expression in assumptions.
+struct SymbolComparison {
 private:
-  RegionOrSymbol Left, Right;
+  SymbolRef Left, Right;
   bool Equality;
 
 public:
-  IteratorComparison(RegionOrSymbol L, RegionOrSymbol R, bool Eq)
+  SymbolComparison(SymbolRef L, SymbolRef R, bool Eq)
   : Left(L), Right(R), Equality(Eq) {}
 
-  RegionOrSymbol getLeft() const { return Left; }
-  RegionOrSymbol getRight() const { return Right; }
+  SymbolRef getLeft() const { return Left; }
+  SymbolRef getRight() const { return Right; }
   bool isEquality() const { return Equality; }
-  bool operator==(const IteratorComparison ) const {
+  bool operator==(const SymbolComparison ) const {
 return Left == X.Left && Right == X.Right && Equality == X.Equality;
   }
-  bool operator!=(const IteratorComparison ) const {
+  bool operator!=(const SymbolComparison ) const {
 return Left != X.Left || Right != X.Right || Equality != X.Equality;
   }
-  void Profile(llvm::FoldingSetNodeID ) const { ID.AddInteger(Equality); }
+  void Profile(llvm::FoldingSetNodeID ) const {
+ID.Add(Left);
+ID.Add(Right);
+ID.AddInteger(Equality);
+  }
 };
 
 class IteratorChecker
@@ -206,8 +208,12 @@
   std::unique_ptr MismatchedBugType;
   std::unique_ptr InvalidatedBugType;
 
-  void handleComparison(CheckerContext , const SVal , const SVal ,
-const SVal , OverloadedOperatorKind Op) const;
+  void handleComparison(CheckerContext , const Expr *CE, const SVal ,
+const SVal , const SVal ,
+OverloadedOperatorKind Op) const;
+  void processComparison(CheckerContext , ProgramStateRef State,
+ SymbolRef Sym1, SymbolRef Sym2, const SVal ,
+ OverloadedOperatorKind Op) const;
   void verifyAccess(CheckerContext , const SVal ) const;
   void verifyDereference(CheckerContext , const SVal ) const;
   void handleIncrement(CheckerContext , const SVal , const SVal ,
@@ -290,8 +296,8 @@
 
 REGISTER_MAP_WITH_PROGRAMSTATE(ContainerMap, const MemRegion *, ContainerData)
 
-REGISTER_MAP_WITH_PROGRAMSTATE(IteratorComparisonMap, const SymExpr *,
-   IteratorComparison)
+REGISTER_MAP_WITH_PROGRAMSTATE(SymbolComparisonMap, const SymExpr *,
+   SymbolComparison)
 
 namespace {
 
@@ -323,15 +329,10 @@
 bool frontModifiable(ProgramStateRef State, const MemRegion *Reg);
 bool backModifiable(ProgramStateRef State, const MemRegion *Reg);
 BinaryOperator::Opcode getOpcode(const SymExpr *SE);
-const RegionOrSymbol getRegionOrSymbol(const SVal );
-const ProgramStateRef processComparison(ProgramStateRef State,
-RegionOrSymbol LVal,
-RegionOrSymbol RVal, bool Equal);
-const ProgramStateRef saveComparison(ProgramStateRef State,
- const SymExpr *Condition, const SVal ,
- const SVal , bool Eq);
-const IteratorComparison *loadComparison(ProgramStateRef State,
- const SymExpr *Condition);
+const ProgramStateRef saveComparison(ProgramStateRef State, SymbolRef Condition,
+ SymbolRef LSym, SymbolRef RSym, bool Eq);