[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-11-23 Thread Borsik Gábor via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
boga95 marked 2 inline comments as done.
Closed by commit rG89bc4c662c6c: [analyzer] Add custom filter functions for 
GenericTaintChecker (authored by boga95).

Changed prior to commit:
  https://reviews.llvm.org/D59516?vs=220385=230771#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516

Files:
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.h
  clang/test/Analysis/Inputs/taint-generic-config.yaml
  clang/test/Analysis/taint-generic.c

Index: clang/test/Analysis/taint-generic.c
===
--- clang/test/Analysis/taint-generic.c
+++ clang/test/Analysis/taint-generic.c
@@ -56,6 +56,8 @@
 extern FILE *stdin;
 #endif
 
+#define bool _Bool
+
 int fscanf(FILE *restrict stream, const char *restrict format, ...);
 int sprintf(char *str, const char *format, ...);
 void setproctitle(const char *fmt, ...);
@@ -346,6 +348,7 @@
 void myScanf(const char*, ...);
 int myPropagator(int, int*);
 int mySnprintf(char*, size_t, const char*, ...);
+bool isOutOfRange(const int*);
 void mySink(int, int, int);
 
 void testConfigurationSources1() {
@@ -372,6 +375,13 @@
   Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
 }
 
+void testConfigurationFilter() {
+  int x = mySource1();
+  if (isOutOfRange()) // the filter function
+return;
+  Buffer[x] = 1; // no-warning
+}
+
 void testConfigurationSinks() {
   int x = mySource1();
   mySink(x, 1, 2);
Index: clang/test/Analysis/Inputs/taint-generic-config.yaml
===
--- clang/test/Analysis/Inputs/taint-generic-config.yaml
+++ clang/test/Analysis/Inputs/taint-generic-config.yaml
@@ -36,8 +36,8 @@
 # A list of filter functions
 Filters:
   # int x; // x is tainted
-  # myFilter(); // x is not tainted anymore
-  - Name: myFilter
+  # isOutOfRange(); // x is not tainted anymore
+  - Name: isOutOfRange
 Args: [0]
 
 # A list of sink functions
Index: clang/lib/StaticAnalyzer/Checkers/Taint.h
===
--- clang/lib/StaticAnalyzer/Checkers/Taint.h
+++ clang/lib/StaticAnalyzer/Checkers/Taint.h
@@ -27,34 +27,39 @@
 static constexpr TaintTagType TaintTagGeneric = 0;
 
 /// Create a new state in which the value of the statement is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S,
+const LocationContext *LCtx,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the value is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SVal V,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the symbol is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SymbolRef Sym,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the pointer represented by the region
 /// is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const MemRegion *R,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State,
+const MemRegion *R,
+TaintTagType Kind = TaintTagGeneric);
+
+LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V);
+
+LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State,
+   const MemRegion *R);
+
+LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State,
+   SymbolRef Sym);
 
 /// Create a new state in a which a sub-region of a given symbol is tainted.
 /// This might be necessary when referring to regions that can not have an
 /// individual symbol, e.g. if they are represented by the default binding of
 /// a LazyCompoundVal.
-LLVM_NODISCARD ProgramStateRef
-addPartialTaint(ProgramStateRef State,
-SymbolRef ParentSym, const SubRegion *SubRegion,
-TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addPartialTaint(
+ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion,

[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-11-19 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

See also 
https://llvm.org/docs/DeveloperPolicy.html#current-contributors-transfering-from-svn.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-11-19 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

You're now supposed to push directly to github.
You might also have missed 
http://lists.llvm.org/pipermail/cfe-dev/2019-August/063219.html.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-11-16 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 marked 2 inline comments as done.
boga95 added a comment.

I did the required changes and tried to commit it, but I couldn't. I heard the 
codebase was migrated to GitHub. Maybe it affected my commit access.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-11-08 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus accepted this revision.
Szelethus added a comment.
This revision is now accepted and ready to land.

LGTM, provided that the inlines are addressed! Thanks!




Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:102-103
   /// system call etc.
-  bool checkPre(const CallExpr *CE, CheckerContext ) const;
+  bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name,
+CheckerContext ) const;
 

I recall that the current thinking is preferring `CallEvent`, though leave this 
as-is for now, because @steakhal might already have it locally.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:105
 
   /// Add taint sources on a pre-visit.
+  bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl,

We might as well explain what the return value here means (and for the other 
functions too).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-11-07 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ accepted this revision.
NoQ added a comment.

@NoQ: "Why not simply remove taint?"
@boga95: //*removes taint*//
@NoQ: "Hmm, now that i think about it, adding a 'no taint' marker might 
actually work correctly more often."

Like, if you have taint on `$x` and try to remove taint from `derived<$x, 
x.a>`, your current implementation will do nothing. But the approach with 
adding a 'no taint' marker will actually add a new marker and make subsequent 
lookups to `derived<$x, x.a>` will return the newly added marker, which is the 
correct behavior; additionally, `derived<$x, x.b>` would remain tainted (where 
`b != a`), which is also the correct behavior. It would have still failed when 
you describe the sub-region slightly differently, but that'd be a fairly minor 
glitch.

The right way to proceed further with the `removeTaint()` approach on 
`SymbolDerived` is to introduce `removePartialTaint()` that would complement 
`addPartialTaint()`. But that will require changing the data structure in the 
program state from a simple set of tainted sub-regions to a sophisticated tree 
of sub-regions that are marked up as tainted or not tainted. Which might have 
as well been a marker.

I still tend to believe that `removeTaint()` is the right approach, but it's a 
bit harder to get right and a bit worse if not enough effort is invested into 
it.

There definitely is, however, a good use for the `removeTaint()` function in 
both approaches: namely, our taint checker still lacks `checkDeadSymbols` :D




Comment at: clang/lib/StaticAnalyzer/Checkers/Taint.cpp:111-112
+ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
+  // If this is a symbol cast, remove the cast before adding the taint. Taint
+  // is cast agnostic.
+  while (const SymbolCast *SC = dyn_cast(Sym))

That's not entirely true. Like, if you check `(char)x`, you still have 24 bytes 
of `x` controlled by the attacker. But that's a good false-positive-proof 
approach.



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:474-476
+  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+  if (!FDecl || FDecl->getKind() != Decl::Function)
+return;

boga95 wrote:
> Szelethus wrote:
> > When do these happen? For implicit functions in C? 
> For example, a lambda doesn't have an FunctionDecl.
Lambda most certainly has a `FunctionDecl`, which is the declaration of its 
`operator()()`, and that's exactly what's going to be in `FDecl` if a lambda is 
invoked. However, the `getKind()` of this `FunctionDecl` will not be 
`Decl::Function` but `Decl::CXXMethod`, like of any other member function.

So this second check is checking that the function is a simple global function.

I recommend replacing with `!isa(FDecl)`, purely for 
readability, or at least adding a comment.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-10-15 Thread Balázs Benics via Phabricator via cfe-commits
steakhal added a comment.

I think this patch is ok.

Although there are remarks:

- I think the current implementation of the taint filtering functions does not 
follow the expected semantics. Now the modelling would remove taint before 
calling the function (//pre statement//). One might expect that the modelling 
would remove taint only on function return (//post statement//). This way we 
would not catch this:

  int myFilter(int* p) { // this function is stated as a filter function in the 
config file
int lookupTable[32] = {};
int value = lookupTable[*p]; // using a tainted value for indexing
escape(p);
return value;
  }

- I agree with @NoQ about the `testConfigurationFilter`, which now does not 
test the implementation but the behavior of the static analyzer if it 
conservatively invalidates in case of an unknown function call.
- I also think that we should have testcases for testing the filtering 
functionality of the config file. Eg. using the `myFilter1` could do the job 
here in the tests.

All in all, I still say that it seems to be a good patch.




Comment at: clang/test/Analysis/taint-generic.c:59
 
+#define bool int
+

Have you considered using `_Bool` instead?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-10-09 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 marked 2 inline comments as done.
boga95 added a comment.

Ping


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-10-03 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 marked 7 inline comments as done.
boga95 added a comment.

Ping




Comment at: clang/test/Analysis/taint-generic.c:393-397
+void testConfigurationFilter3() {
+  int x = mySource1();
+  myFilter3();
+  Buffer[x] = 1; // no-warning
+}

NoQ wrote:
> In this example `myFilter3` promises not to alter the value of `x` due to 
> `const`-qualifier on the pointee type of the parameter. Additionally, the 
> function has no chance of preventing the program from reaching the buffer 
> overflow line, other than crashing the program (given that it's C and there 
> are no exceptions). Therefore i'd say this is also a false negative.
> 
> A better motivational example that doesn't immediately look like a false 
> negative may look like this:
> ```lang=c
> void testConfigurationFilter3() {
>   int x = mySource1();
>   if (isOutOfRange(x)) // the filter function
> return;
>   Buffer[x] = 1; // no-warning
> }
> ```
> In this case the function looks at the value of `x` (there's really no need 
> to pass it by pointer) and notifies the caller through its return value that 
> it is unsafe to access the buffer with such index. This is probably the only 
> situation where a "filter" annotation is actually worth it.
It could work with C++ despite it hasn't supported any specific feature (yet).

I pass it by `const int*` because it currently doesn't support the value 
semantics :(. It has been already in my TODO list.



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:53-56
+  void Profile(llvm::FoldingSetNodeID ) const {
+ID.AddInteger(Arg);
+ID.AddInteger(TagType);
+  }

Szelethus wrote:
> Interesting, isn't this predefined for `std::pair`?
Unfortunately, it's not. 



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:474-476
+  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+  if (!FDecl || FDecl->getKind() != Decl::Function)
+return;

Szelethus wrote:
> When do these happen? For implicit functions in C? 
For example, a lambda doesn't have an FunctionDecl.



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:478-480
+  StringRef Name = C.getCalleeName(FDecl);
+  if (Name.empty())
+return;

Szelethus wrote:
> And this? Lambdas maybe?
I haven't found any example of this. It's just a copy-paste refactoring so I 
don't know the intentions behind that.

I think the checker should work perfectly without it.



Comment at: lib/StaticAnalyzer/Checkers/Taint.h:25-28
 using TaintTagType = unsigned;
 
-static constexpr TaintTagType TaintTagGeneric = 0;
+static constexpr TaintTagType TaintTagNotTainted = 0;
+static constexpr TaintTagType TaintTagGeneric = 1;

Szelethus wrote:
> Is there a **very** good reason behind us not using an `enum` instead?
Unfortunately, enums cannot put into FoldingSet. It has to be a primitive type 
or a struct with the necessary functions. I can use enums and cast them to 
unsigned and back, but it isn't convenient.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-09-16 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 220385.
boga95 marked 4 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516

Files:
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.h
  clang/test/Analysis/Inputs/taint-generic-config.yaml
  clang/test/Analysis/taint-generic.c

Index: clang/test/Analysis/taint-generic.c
===
--- clang/test/Analysis/taint-generic.c
+++ clang/test/Analysis/taint-generic.c
@@ -56,6 +56,8 @@
 extern FILE *stdin;
 #endif
 
+#define bool int
+
 int fscanf(FILE *restrict stream, const char *restrict format, ...);
 int sprintf(char *str, const char *format, ...);
 void setproctitle(const char *fmt, ...);
@@ -346,6 +348,7 @@
 void myScanf(const char*, ...);
 int myPropagator(int, int*);
 int mySnprintf(char*, size_t, const char*, ...);
+bool isOutOfRange(const int*);
 void mySink(int, int, int);
 
 void testConfigurationSources1() {
@@ -372,6 +375,13 @@
   Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
 }
 
+void testConfigurationFilter() {
+  int x = mySource1();
+  if (isOutOfRange()) // the filter function
+return;
+  Buffer[x] = 1; // no-warning
+}
+
 void testConfigurationSinks() {
   int x = mySource1();
   mySink(x, 1, 2);
Index: clang/test/Analysis/Inputs/taint-generic-config.yaml
===
--- clang/test/Analysis/Inputs/taint-generic-config.yaml
+++ clang/test/Analysis/Inputs/taint-generic-config.yaml
@@ -36,8 +36,8 @@
 # A list of filter functions
 Filters:
   # int x; // x is tainted
-  # myFilter(); // x is not tainted anymore
-  - Name: myFilter
+  # isOutOfRange(); // x is not tainted anymore
+  - Name: isOutOfRange
 Args: [0]
 
 # A list of sink functions
Index: clang/lib/StaticAnalyzer/Checkers/Taint.h
===
--- clang/lib/StaticAnalyzer/Checkers/Taint.h
+++ clang/lib/StaticAnalyzer/Checkers/Taint.h
@@ -27,34 +27,39 @@
 static constexpr TaintTagType TaintTagGeneric = 0;
 
 /// Create a new state in which the value of the statement is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S,
+const LocationContext *LCtx,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the value is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SVal V,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the symbol is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SymbolRef Sym,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the pointer represented by the region
 /// is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const MemRegion *R,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State,
+const MemRegion *R,
+TaintTagType Kind = TaintTagGeneric);
+
+LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V);
+
+LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State,
+   const MemRegion *R);
+
+LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State,
+   SymbolRef Sym);
 
 /// Create a new state in a which a sub-region of a given symbol is tainted.
 /// This might be necessary when referring to regions that can not have an
 /// individual symbol, e.g. if they are represented by the default binding of
 /// a LazyCompoundVal.
-LLVM_NODISCARD ProgramStateRef
-addPartialTaint(ProgramStateRef State,
-SymbolRef ParentSym, const SubRegion *SubRegion,
-TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addPartialTaint(
+ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the statement has a tainted value in the given state.
 bool isTainted(ProgramStateRef State, const Stmt *S,
@@ -99,4 +104,3 @@
 } // namespace clang
 
 #endif
-
Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp

[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-09-12 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:517
+if (V)
+  State = addTaint(State, *V, TaintTagNotTainted);
+  }

Why not simply remove taint?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-09-12 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

I'd like the test cases to actually demonstrate the correct use of the filters 
and the correct behavior of the Analyzer when the filters are annotated 
correctly, but it looks to me that they either demonstrate behavior when the 
annotation is //not// used correctly, or we disagree about how the taint should 
work in the first place. Testing the behavior when the annotation is misplaced 
is fine (with enough comments and probably FIXMEs where applicable), but 
"positive" tests are more valuable because they are the actual common cases 
(hopefully).




Comment at: clang/test/Analysis/taint-generic.c:381-385
+void testConfigurationFilter1() {
+  int x = mySource1();
+  myFilter1();
+  Buffer[x] = 1; // no-warning
+}

`myFilter1` will create a new conjured symbol within `x` when evaluated 
conservatively. In this case there clearly shouldn't be a warning on `Buffer[x] 
= 1`, because nobody marked the new symbol as tainted to begin with. Therefore 
i think that this test doesn't really test the new functionality.



Comment at: clang/test/Analysis/taint-generic.c:387-391
+void testConfigurationFilter2() {
+  int x = mySource1();
+  myFilter2();
+  Buffer[x] = 1; // no-warning
+}

Assuming `myFilter2` is inlined, we have two execution paths here. On one of 
them `x` is reset to a concrete 0. Because concrete values cannot carry taint 
to begin with, this execution path doesn't test the new functionality. On the 
other path `x` indeed has the old tainted value but it's now constrained to 
`[0, INT_MAX]`. Because it's still not constrained to be less than `BUFSIZE`, 
i'd say this looks more like a false negative and the new functionality makes 
the analysis worse. So this test does test the new functionality, but it kinda 
makes the new functionality look bad rather than good.



Comment at: clang/test/Analysis/taint-generic.c:393-397
+void testConfigurationFilter3() {
+  int x = mySource1();
+  myFilter3();
+  Buffer[x] = 1; // no-warning
+}

In this example `myFilter3` promises not to alter the value of `x` due to 
`const`-qualifier on the pointee type of the parameter. Additionally, the 
function has no chance of preventing the program from reaching the buffer 
overflow line, other than crashing the program (given that it's C and there are 
no exceptions). Therefore i'd say this is also a false negative.

A better motivational example that doesn't immediately look like a false 
negative may look like this:
```lang=c
void testConfigurationFilter3() {
  int x = mySource1();
  if (isOutOfRange(x)) // the filter function
return;
  Buffer[x] = 1; // no-warning
}
```
In this case the function looks at the value of `x` (there's really no need to 
pass it by pointer) and notifies the caller through its return value that it is 
unsafe to access the buffer with such index. This is probably the only 
situation where a "filter" annotation is actually worth it.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-09-12 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 219981.
boga95 marked 4 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516

Files:
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.h
  clang/test/Analysis/Inputs/taint-generic-config.yaml
  clang/test/Analysis/taint-generic.c

Index: clang/test/Analysis/taint-generic.c
===
--- clang/test/Analysis/taint-generic.c
+++ clang/test/Analysis/taint-generic.c
@@ -346,6 +346,12 @@
 void myScanf(const char*, ...);
 int myPropagator(int, int*);
 int mySnprintf(char*, size_t, const char*, ...);
+void myFilter1(int*);
+void myFilter2(int* x) {
+  if (*x < 0)
+*x = 0;
+}
+void myFilter3(const int*);
 void mySink(int, int, int);
 
 void testConfigurationSources1() {
@@ -372,6 +378,24 @@
   Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
 }
 
+void testConfigurationFilter1() {
+  int x = mySource1();
+  myFilter1();
+  Buffer[x] = 1; // no-warning
+}
+
+void testConfigurationFilter2() {
+  int x = mySource1();
+  myFilter2();
+  Buffer[x] = 1; // no-warning
+}
+
+void testConfigurationFilter3() {
+  int x = mySource1();
+  myFilter3();
+  Buffer[x] = 1; // no-warning
+}
+
 void testConfigurationSinks() {
   int x = mySource1();
   mySink(x, 1, 2);
Index: clang/test/Analysis/Inputs/taint-generic-config.yaml
===
--- clang/test/Analysis/Inputs/taint-generic-config.yaml
+++ clang/test/Analysis/Inputs/taint-generic-config.yaml
@@ -36,8 +36,12 @@
 # A list of filter functions
 Filters:
   # int x; // x is tainted
-  # myFilter(); // x is not tainted anymore
-  - Name: myFilter
+  # myFilter1(); // x is not tainted anymore
+  - Name: myFilter1
+Args: [0]
+  - Name: myFilter2
+Args: [0]
+  - Name: myFilter3
 Args: [0]
 
 # A list of sink functions
Index: clang/lib/StaticAnalyzer/Checkers/Taint.h
===
--- clang/lib/StaticAnalyzer/Checkers/Taint.h
+++ clang/lib/StaticAnalyzer/Checkers/Taint.h
@@ -24,37 +24,35 @@
 /// taint.
 using TaintTagType = unsigned;
 
-static constexpr TaintTagType TaintTagGeneric = 0;
+static constexpr TaintTagType TaintTagNotTainted = 0;
+static constexpr TaintTagType TaintTagGeneric = 1;
 
 /// Create a new state in which the value of the statement is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S,
+const LocationContext *LCtx,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the value is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SVal V,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the symbol is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SymbolRef Sym,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the pointer represented by the region
 /// is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const MemRegion *R,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State,
+const MemRegion *R,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in a which a sub-region of a given symbol is tainted.
 /// This might be necessary when referring to regions that can not have an
 /// individual symbol, e.g. if they are represented by the default binding of
 /// a LazyCompoundVal.
-LLVM_NODISCARD ProgramStateRef
-addPartialTaint(ProgramStateRef State,
-SymbolRef ParentSym, const SubRegion *SubRegion,
-TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addPartialTaint(
+ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the statement has a tainted value in the given state.
 bool isTainted(ProgramStateRef State, const Stmt *S,
@@ -99,4 +97,3 @@
 } // namespace clang
 
 #endif
-
Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp
===
--- 

[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-07-31 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added a comment.

In general, the patch is looking alright, I'll take a second look later on. 
Don't mind my inlines too much, they are more directed towards the original 
code then your changes.




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:53-56
+  void Profile(llvm::FoldingSetNodeID ) const {
+ID.AddInteger(Arg);
+ID.AddInteger(TagType);
+  }

Interesting, isn't this predefined for `std::pair`?



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:474-476
+  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+  if (!FDecl || FDecl->getKind() != Decl::Function)
+return;

When do these happen? For implicit functions in C? 



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:478-480
+  StringRef Name = C.getCalleeName(FDecl);
+  if (Name.empty())
+return;

And this? Lambdas maybe?



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:560
 
-  for (unsigned ArgNum : TaintArgs) {
+  for (auto ArgTypePair : TaintArgs) {
+unsigned ArgNum = ArgTypePair.Arg;

Please add the full type name, else it isn't obvious why we don't take it as a 
const reference.



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:561-562
+  for (auto ArgTypePair : TaintArgs) {
+unsigned ArgNum = ArgTypePair.Arg;
+TaintTagType TagType = ArgTypePair.TagType;
 // Special handling for the tainted return value.

We don't use `std::pair`, so we have descriptive field names, do we need these?



Comment at: lib/StaticAnalyzer/Checkers/Taint.h:25-28
 using TaintTagType = unsigned;
 
-static constexpr TaintTagType TaintTagGeneric = 0;
+static constexpr TaintTagType TaintTagNotTainted = 0;
+static constexpr TaintTagType TaintTagGeneric = 1;

Is there a **very** good reason behind us not using an `enum` instead?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-04-04 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 193705.
boga95 added a comment.

Rebase after https://reviews.llvm.org/D59861.
Fix custom filter test case: functions without definition always remove 
taintedness.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Taint.cpp
  lib/StaticAnalyzer/Checkers/Taint.h
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -296,13 +296,15 @@
 *(volatile int *) 0; // no-warning
 }
 
-
 // Test configuration
 int mySource1();
 void mySource2(int*);
 void myScanf(const char*, ...);
 int myPropagator(int, int*);
 int mySnprintf(char*, size_t, const char*, ...);
+void myFilter1(int* x) {}
+void myFilter2(const int*);
+void myFilter3(int);
 void mySink(int, int, int);
 
 void testConfigurationSources1() {
@@ -329,6 +331,24 @@
   Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
 }
 
+void testConfigurationFilter1() {
+  int x = mySource1();
+  myFilter1();
+  Buffer[x] = 1; // no-warning
+}
+
+void testConfigurationFilter2() {
+  int x = mySource1();
+  myFilter2(); // Pass by const int*
+  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testConfigurationFilter3() {
+  int x = mySource1();
+  myFilter3(x); // Pass by value
+  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+}
+
 void testConfigurationSinks() {
   int x = mySource1();
   mySink(x, 1, 2); // expected-warning {{Untrusted data is passed to a user defined sink}}
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- test/Analysis/Inputs/taint-generic-config.yaml
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -36,8 +36,8 @@
 # A list of filter functions
 Filters:
   # int x; // x is tainted
-  # myFilter(); // x is not tainted anymore
-  - Name: myFilter
+  # myFilter1(); // x is not tainted anymore
+  - Name: myFilter1
 Args: [0]
 
 # A list of sink functions
Index: lib/StaticAnalyzer/Checkers/Taint.h
===
--- lib/StaticAnalyzer/Checkers/Taint.h
+++ lib/StaticAnalyzer/Checkers/Taint.h
@@ -24,37 +24,35 @@
 /// taint.
 using TaintTagType = unsigned;
 
-static constexpr TaintTagType TaintTagGeneric = 0;
+static constexpr TaintTagType TaintTagNotTainted = 0;
+static constexpr TaintTagType TaintTagGeneric = 1;
 
 /// Create a new state in which the value of the statement is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S,
+const LocationContext *LCtx,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the value is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SVal V,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the symbol is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SymbolRef Sym,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the pointer represented by the region
 /// is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const MemRegion *R,
- TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State,
+const MemRegion *R,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in a which a sub-region of a given symbol is tainted.
 /// This might be necessary when referring to regions that can not have an
 /// individual symbol, e.g. if they are represented by the default binding of
 /// a LazyCompoundVal.
-LLVM_NODISCARD ProgramStateRef
-addPartialTaint(ProgramStateRef State,
-SymbolRef ParentSym, const SubRegion *SubRegion,
-TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addPartialTaint(
+ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion,
+TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the statement has a tainted value in the given state.
 bool 

[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-03-26 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

Hi, i wanted to squeeze in D59861  somewhere 
in the middle of your work, would you mind?
I'll definitely have a look at your patches soon :)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-03-26 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 added a comment.

I add a new taint type, which represents a lack of taintedness. That's why I 
changed the name of `addTaint()` to `setTaint()`. Of course, it's not an 
important change, I can move it to another patch.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-03-25 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus requested changes to this revision.
Szelethus added a comment.
This revision now requires changes to proceed.

Same thing.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59516: [analyzer] Add custom filter functions for GenericTaintChecker

2019-03-21 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 191668.
boga95 retitled this revision from "[analyzer] Make GenericTaintChecker 
configurable" to "[analyzer] Add custom filter functions for 
GenericTaintChecker".

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516

Files:
  include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
  include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Core/ProgramState.cpp
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -296,13 +296,13 @@
 *(volatile int *) 0; // no-warning
 }
 
-
 // Test configuration
 int mySource1();
 void mySource2(int*);
 void myScanf(const char*, ...);
 int myPropagator(int, int*);
 int mySnprintf(char*, size_t, const char*, ...);
+void myFilter(int*);
 void mySink(int, int, int);
 
 void testConfigurationSources1() {
@@ -329,6 +329,12 @@
   Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
 }
 
+void testConfigurationFilter() {
+  int x = mySource1();
+  myFilter();
+  Buffer[x] = 1; // no-warning
+}
+
 void testConfigurationSinks() {
   int x = mySource1();
   mySink(x, 1, 2); // expected-warning {{Untrusted data is passed to a user defined sink}}
Index: lib/StaticAnalyzer/Core/ProgramState.cpp
===
--- lib/StaticAnalyzer/Core/ProgramState.cpp
+++ lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -658,20 +658,20 @@
   return true;
 }
 
-ProgramStateRef ProgramState::addTaint(const Stmt *S,
+ProgramStateRef ProgramState::setTaint(const Stmt *S,
const LocationContext *LCtx,
TaintTagType Kind) const {
   if (const Expr *E = dyn_cast_or_null(S))
 S = E->IgnoreParens();
 
-  return addTaint(getSVal(S, LCtx), Kind);
+  return setTaint(getSVal(S, LCtx), Kind);
 }
 
-ProgramStateRef ProgramState::addTaint(SVal V,
+ProgramStateRef ProgramState::setTaint(SVal V,
TaintTagType Kind) const {
   SymbolRef Sym = V.getAsSymbol();
   if (Sym)
-return addTaint(Sym, Kind);
+return setTaint(Sym, Kind);
 
   // If the SVal represents a structure, try to mass-taint all values within the
   // structure. For now it only works efficiently on lazy compound values that
@@ -685,22 +685,22 @@
   if (auto LCV = V.getAs()) {
 if (Optional binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
   if (SymbolRef Sym = binding->getAsSymbol())
-return addPartialTaint(Sym, LCV->getRegion(), Kind);
+return setPartialTaint(Sym, LCV->getRegion(), Kind);
 }
   }
 
   const MemRegion *R = V.getAsRegion();
-  return addTaint(R, Kind);
+  return setTaint(R, Kind);
 }
 
-ProgramStateRef ProgramState::addTaint(const MemRegion *R,
+ProgramStateRef ProgramState::setTaint(const MemRegion *R,
TaintTagType Kind) const {
   if (const SymbolicRegion *SR = dyn_cast_or_null(R))
-return addTaint(SR->getSymbol(), Kind);
+return setTaint(SR->getSymbol(), Kind);
   return this;
 }
 
-ProgramStateRef ProgramState::addTaint(SymbolRef Sym,
+ProgramStateRef ProgramState::setTaint(SymbolRef Sym,
TaintTagType Kind) const {
   // If this is a symbol cast, remove the cast before adding the taint. Taint
   // is cast agnostic.
@@ -712,7 +712,7 @@
   return NewState;
 }
 
-ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
+ProgramStateRef ProgramState::setPartialTaint(SymbolRef ParentSym,
   const SubRegion *SubRegion,
   TaintTagType Kind) const {
   // Ignore partial taint if the entire parent symbol is already tainted.
@@ -721,7 +721,7 @@
 
   // Partial taint applies if only a portion of the symbol is tainted.
   if (SubRegion == SubRegion->getBaseRegion())
-return addTaint(ParentSym, Kind);
+return setTaint(ParentSym, Kind);
 
   const TaintedSubRegions *SavedRegs = get(ParentSym);
   TaintedSubRegions Regs =
@@ -779,7 +779,7 @@
   continue;
 
 if (const TaintTagType *Tag = get(*SI)) {
-  if (*Tag == Kind)
+  if (*Tag != TaintTagNotTainted && *Tag == Kind)
 return true;
 }
 
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -31,6 +31,28 @@
 using namespace ento;
 
 namespace {
+/// A struct to store tainted argument and taint type as a pair in the program
+/// state.
+struct TaintArgTypePair {
+  unsigned Arg;
+  TaintTagType TagType;
+
+  bool