[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-08-13 Thread Alex Langford via Phabricator via cfe-commits
xiaobai added inline comments.



Comment at: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:74
+TaintConfiguration() = default;
+TaintConfiguration(const TaintConfiguration &) = delete;
+TaintConfiguration(TaintConfiguration &&) = default;

Is there a particular reason this is marked as delete? This present an issue 
because the Optional constructed in `ento::registerGenericTaintChecker` will 
use this constructor. I don't hit this issue with any recent version of clang 
(tried with 6, 7, and 8) but I am hitting it with clang 3.8 on the swift-ci 
ubuntu bots.


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-28 Thread Borsik Gábor via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL367190: [analyzer] Add yaml parser to GenericTaintChecker 
(authored by boga95, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D59555?vs=212065=212094#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
  cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  cfe/trunk/lib/StaticAnalyzer/Checkers/Yaml.h
  cfe/trunk/test/Analysis/Inputs/taint-generic-config-ill-formed.yaml
  cfe/trunk/test/Analysis/Inputs/taint-generic-config-invalid-arg.yaml
  cfe/trunk/test/Analysis/Inputs/taint-generic-config.yaml
  cfe/trunk/test/Analysis/analyzer-config.c
  cfe/trunk/test/Analysis/taint-generic.c

Index: cfe/trunk/lib/StaticAnalyzer/Checkers/Yaml.h
===
--- cfe/trunk/lib/StaticAnalyzer/Checkers/Yaml.h
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/Yaml.h
@@ -0,0 +1,59 @@
+//== Yaml.h  -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines convenience functions for handling YAML configuration files
+// for checkers/packages.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H
+#define LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H
+
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace clang {
+namespace ento {
+
+/// Read the given file from the filesystem and parse it as a yaml file. The
+/// template parameter must have a yaml MappingTraits.
+/// Emit diagnostic error in case of any failure.
+template 
+llvm::Optional getConfiguration(CheckerManager , Checker *Chk,
+   StringRef Option, StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())
+return None;
+
+  llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
+  llvm::ErrorOr> Buffer =
+  FS->getBufferForFile(ConfigFile.str());
+
+  if (std::error_code ec = Buffer.getError()) {
+Mgr.reportInvalidCheckerOptionValue(Chk, Option,
+"a valid filename instead of '" +
+std::string(ConfigFile) + "'");
+return None;
+  }
+
+  llvm::yaml::Input Input(Buffer.get()->getBuffer());
+  T Config;
+  Input >> Config;
+
+  if (std::error_code ec = Input.error()) {
+Mgr.reportInvalidCheckerOptionValue(Chk, Option,
+"a valid yaml file: " + ec.message());
+return None;
+  }
+
+  return Config;
+}
+
+} // namespace ento
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MOVE_H
Index: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -15,16 +15,18 @@
 //===--===//
 
 #include "Taint.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "Yaml.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
-#include 
-#include 
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
+#include 
 #include 
 
 using namespace clang;
@@ -44,14 +46,51 @@
 
   void checkPreStmt(const CallExpr *CE, CheckerContext ) const;
 
-  void printState(raw_ostream , ProgramStateRef State,
-  const char *NL, const char *Sep) const override;
+  void printState(raw_ostream , ProgramStateRef State, const char *NL,
+  const char *Sep) const override;
 
-private:
-  static const unsigned InvalidArgIndex = UINT_MAX;
+  using ArgVector = SmallVector;
+  using SignedArgVector = SmallVector;
+
+  enum class VariadicType { None, Src, Dst };
+
+  /// Used to parse the configuration file.
+  struct TaintConfiguration {
+using NameArgsPair = std::pair;
+
+struct Propagation {
+  std::string Name;
+  ArgVector 

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-27 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 212065.
boga95 marked 16 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Yaml.h
  test/Analysis/Inputs/taint-generic-config-ill-formed.yaml
  test/Analysis/Inputs/taint-generic-config-invalid-arg.yaml
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,49 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -DFILE_IS_STRUCT \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
+
+// CHECK-INVALID-FILE: (frontend): invalid input for checker option
+// CHECK-INVALID-FILE-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename instead of
+// CHECK-INVALID-FILE-SAME:'justguessit'
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-ILL-FORMED
+
+// CHECK-ILL-FORMED: (frontend): invalid input for checker option
+// CHECK-ILL-FORMED-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-ILL-FORMED-SAME:that expects a valid yaml file: Invalid argument
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG
+
+// CHECK-INVALID-ARG: (frontend): invalid input for checker option
+// CHECK-INVALID-ARG-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-ARG-SAME:that expects an argument number for propagation
+// CHECK-INVALID-ARG-SAME:rules greater or equal to -1
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,50 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name: mySource1
+DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(); // x is tainted
+  - Name: mySource2
+DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", , ); // x and y are tainted
+  - Name:  myScanf
+VariadicType:  Dst
+VariadicIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, ); // y is tainted
+  - Name: myPropagator
+SrcArgs:  [0]
+DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:  mySnprintf
+SrcArgs:   [1]
+DstArgs:   [0, -1]
+VariadicType:  Src
+VariadicIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(); // x is not tainted anymore
+  - Name: myFilter
+Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+Args: [0, 2]
Index: test/Analysis/Inputs/taint-generic-config-invalid-arg.yaml

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-21 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus accepted this revision.
Szelethus added a comment.

LGTM, thanks! Please mark inlines as done as you fix them. If you like the file 
description I proposed, I'm happy to commit this on your behalf (or you might 
as well apply for a commit access, since you have a track record of high 
quality patches already).




Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:9
+//
+// This file contains a simple interface allow to open and parse yaml files.
+//

Szelethus wrote:
> Is this actually the reason why we have this file? We already have a YAML 
> parser in LLVM, what's does this file do that the "default" parser doesn't?
I would appreciate some comments, it still isn't immediately obvious why we 
need yet another yaml file. How about

"This file defines convenience functions for handling YAML configuration files 
for checkers/packages".


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-19 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: test/Analysis/taint-generic.c:24
+// CHECK-INVALID-FILE-SAME:
'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename
 

Szelethus wrote:
> NoQ wrote:
> > Szelethus wrote:
> > > Could you please add the rest of the error message?
> > I'd rather remove the rest of the error message. There's no need to 
> > duplicate something that the user has already written on the command line.
> > 
> > Or do we think like \escapes?
> I'm not sure what \escapes mean -- however, if we emit the filename in the 
> error message, we should test it. Also, how many filenames has the user 
> written in the comand line? Include paths for this and that, output file, 
> inputfile, files to link against... I would very much prefer preserving the 
> current error message.
I'm thinking, like, the user is using zsh and writing `file#1.txt` instead of 
`file\#1.txt`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-18 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 210540.
boga95 marked 7 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Yaml.h
  test/Analysis/Inputs/taint-generic-config-ill-formed.yaml
  test/Analysis/Inputs/taint-generic-config-invalid-arg.yaml
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,49 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -DFILE_IS_STRUCT \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
+
+// CHECK-INVALID-FILE: (frontend): invalid input for checker option
+// CHECK-INVALID-FILE-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename instead of
+// CHECK-INVALID-FILE-SAME:'justguessit'
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-ILL-FORMED
+
+// CHECK-ILL-FORMED: (frontend): invalid input for checker option
+// CHECK-ILL-FORMED-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-ILL-FORMED-SAME:that expects a valid yaml file: Invalid argument
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG
+
+// CHECK-INVALID-ARG: (frontend): invalid input for checker option
+// CHECK-INVALID-ARG-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-ARG-SAME:that expects an argument number for propagation
+// CHECK-INVALID-ARG-SAME:rules greater or equal to -1
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,50 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name: mySource1
+DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(); // x is tainted
+  - Name: mySource2
+DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", , ); // x and y are tainted
+  - Name:  myScanf
+VariadicType:  Dst
+VariadicIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, ); // y is tainted
+  - Name: myPropagator
+SrcArgs:  [0]
+DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:  mySnprintf
+SrcArgs:   [1]
+DstArgs:   [0, -1]
+VariadicType:  Src
+VariadicIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(); // x is not tainted anymore
+  - Name: myFilter
+Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+Args: [0, 2]
Index: test/Analysis/Inputs/taint-generic-config-invalid-arg.yaml

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-18 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added inline comments.



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:48
+  return Config;
+}

Szelethus wrote:
> ```lang=c++
> } // end of namespace clang
> } // end of namespace ento
> ```
I mean, the other way around >.<



Comment at: test/Analysis/taint-generic.c:24
+// CHECK-INVALID-FILE-SAME:
'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename
 

NoQ wrote:
> Szelethus wrote:
> > Could you please add the rest of the error message?
> I'd rather remove the rest of the error message. There's no need to duplicate 
> something that the user has already written on the command line.
> 
> Or do we think like \escapes?
I'm not sure what \escapes mean -- however, if we emit the filename in the 
error message, we should test it. Also, how many filenames has the user written 
in the comand line? Include paths for this and that, output file, inputfile, 
files to link against... I would very much prefer preserving the current error 
message.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-17 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ accepted this revision.
NoQ added a comment.
This revision is now accepted and ready to land.

Looks great to me once @Szelethus's comments are addressed. Thanks!!




Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:34
+std::string(ConfigFile) + "'");
+return {};
+  }

I suggest `None` for being more explicit and readable.



Comment at: test/Analysis/taint-generic.c:24
+// CHECK-INVALID-FILE-SAME:
'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename
 

Szelethus wrote:
> Could you please add the rest of the error message?
I'd rather remove the rest of the error message. There's no need to duplicate 
something that the user has already written on the command line.

Or do we think like \escapes?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-17 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added a comment.

Hmm, okay, so we convert `-1` from the config file to `UINT_MAX` in the code, I 
like it!

I wrote a couple nits but they really are just that. In general, for each 
different error message, a different test case would be great.




Comment at: include/clang/StaticAnalyzer/Checkers/Checkers.td:805
+  "Config",
+  "Specifies the name of the configuration file.",
+  "",

Okay, so how do I know what the format of that file is? How about we create 
another option (in a different revision) that dumps an example configuration 
with comments?



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:27
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
-#include 
-#include 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringMap.h"

Do we need this include?



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:89-92
+  static const unsigned InvalidArgIndex{std::numeric_limits::max()};
   /// Denotes the return vale.
-  static const unsigned ReturnValueIndex = UINT_MAX - 1;
+  static const unsigned ReturnValueIndex{std::numeric_limits::max() -
+ 1};

Leave this as is for now, but maaybe in the future we should just use an enum. 
What do you think?



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:304-306
+  Mgr.reportInvalidCheckerOptionValue(
+  this, Option,
+  "an argument number for propagation rules greater or equal to -1");

Could we have a test for this too? :)



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:1
+//== Yaml.h --- -*- C++ -*--=//
+//

Have with with the same length as the rest :)



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:9
+//
+// This file contains a simple interface allow to open and parse yaml files.
+//

Is this actually the reason why we have this file? We already have a YAML 
parser in LLVM, what's does this file do that the "default" parser doesn't?



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:14
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/YAMLTraits.h"

Hmmm, do we need this include?



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:16
+#include "llvm/Support/YAMLTraits.h"
+
+/// Read the given file from the filesystem and parse it as a yaml file. The

```lang=c++
namespace clang {
namespace ento {
```



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:42-43
+  if (std::error_code ec = Input.error()) {
+Mgr.reportInvalidCheckerOptionValue(Chk, Option,
+"a valid yaml file: " + ec.message());
+return {};

And for this too?



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:48
+  return Config;
+}

```lang=c++
} // end of namespace clang
} // end of namespace ento
```



Comment at: test/Analysis/taint-generic.c:24
+// CHECK-INVALID-FILE-SAME:
'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename
 

Could you please add the rest of the error message?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-17 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 210260.
boga95 marked 2 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Yaml.h
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,27 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -DFILE_IS_STRUCT \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
+
+// CHECK-INVALID-FILE: (frontend): invalid input for checker option
+// CHECK-INVALID-FILE-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,51 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name: mySource1
+DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(); // x is tainted
+  - Name: mySource2
+DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", , ); // x and y are tainted
+  - Name:  myScanf
+VariadicType:  Dst
+VariadicIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, ); // y is tainted
+  - Name: myPropagator
+SrcArgs:  [0]
+DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:  mySnprintf
+SrcArgs:   [1]
+DstArgs:   [0, -1]
+VariadicType:  Src
+VariadicIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(); // x is not tainted anymore
+  - Name: myFilter
+Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+Args: [0, 2]
+
Index: lib/StaticAnalyzer/Checkers/Yaml.h
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/Yaml.h
@@ -0,0 +1,48 @@
+//== Yaml.h --- -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains a simple interface allow to open and parse yaml files.
+//
+//===--===//
+
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/YAMLTraits.h"
+
+/// Read the given file from the filesystem and parse it as a yaml file. The
+/// template parameter must have a yaml MappingTraits.
+template 
+llvm::Optional getConfiguration(clang::ento::CheckerManager ,
+   Checker *Chk, llvm::StringRef Option,
+   llvm::StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())
+return {};
+
+  llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
+  llvm::ErrorOr> Buffer =
+  FS->getBufferForFile(ConfigFile.str());
+
+  if 

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-17 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 210258.
boga95 marked an inline comment as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Yaml.h
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,27 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -DFILE_IS_STRUCT \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
+
+// CHECK-INVALID-FILE: (frontend): invalid input for checker option
+// CHECK-INVALID-FILE-SAME:'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:that expects a valid filename
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,51 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name: mySource1
+DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(); // x is tainted
+  - Name: mySource2
+DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", , ); // x and y are tainted
+  - Name:  myScanf
+VariadicType:  Dst
+VariadicIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, ); // y is tainted
+  - Name: myPropagator
+SrcArgs:  [0]
+DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:  mySnprintf
+SrcArgs:   [1]
+DstArgs:   [0, -1]
+VariadicType:  Src
+VariadicIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(); // x is not tainted anymore
+  - Name: myFilter
+Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+Args: [0, 2]
+
Index: lib/StaticAnalyzer/Checkers/Yaml.h
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/Yaml.h
@@ -0,0 +1,48 @@
+//== Yaml.h --- -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains a simple interface allow to open and parse yaml files.
+//
+//===--===//
+
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/YAMLTraits.h"
+
+/// Read the given file from the filesystem and parse it as a yaml file. The
+/// template parameter must have a yaml MappingTraits.
+template 
+llvm::Optional getConfiguration(clang::ento::CheckerManager ,
+   Checker *Chk, llvm::StringRef Option,
+   llvm::StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())
+return {};
+
+  llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
+  llvm::ErrorOr> Buffer =
+  FS->getBufferForFile(ConfigFile.str());
+
+  if 

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-16 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added a comment.

Starting to look real good!




Comment at: include/clang/StaticAnalyzer/Checkers/Checkers.td:807
+  "",
+  Released>,
+  ]>,

We mark options that are not yet ready for used with `InAlpha`, rather then 
`Released`. I think it would be more fitting in this case!

Mind that if you'd like to list this option after that, you'd have to use
```lang=bash
clang -cc1 -analyzer-checker-option-help-alpha
```



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:58
+
+  /// The ``TaintConfiguration`` is used to parse configuration file.
+  struct TaintConfiguration {

Just simply `Used to parse the configuration file`.
https://llvm.org/docs/CodingStandards.html#doxygen-use-in-documentation-comments



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:302-303
+  Result.push_back(InvalidArgIndex);
+  llvm::errs() << "Invalid arg number for propagation rules: " << Arg
+   << '\n';
+} else

Do we emit an error for this case?



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:1
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/ADT/APInt.h"

Header blurb (licence stuff etc)!



Comment at: test/Analysis/taint-generic.c:1-2
-// RUN: %clang_analyze_cc1  
-analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 
-Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT 
-analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 
-Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  
-analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 
-analyzer-config 
alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT 
-analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 
-analyzer-config 
alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 -Wno-format-security -verify %s
 

Could you please use line breaks here? I know it isn't your making, but if 
we're touching it anyways :^)

```
// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
// RUN:   -DFILE_IS_STRUCT \
// RUN:   -analyzer-checker=alpha.security.taint \
// RUN:   -analyzer-checker=core \
// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
// RUN:   -analyzer-config \
// RUN: 
alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
```

And also a testcase for an incorrect checker option:
```

// RUN: not %clang_analyze_cc1 -verify %s \
// RUN:   -analyzer-checker=alpha.security.taint \
// RUN:   -analyzer-config \
// RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE

// CHECK-INVALID-FILE: (frontend): invalid input for checker option
// CHECK-INVALID-FILE-SAME:
'alpha.security.taint.TaintPropagation:Config',
// CHECK-INVALID-FILE-SAME:that expects a valid yaml file
```
something like that.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-07-16 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 210073.
boga95 marked 11 inline comments as done.
boga95 added a comment.

Report diagnostic error in case of an invalid filename or an ill-formed yaml 
file.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Yaml.h
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,5 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,51 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name: mySource1
+DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(); // x is tainted
+  - Name: mySource2
+DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", , ); // x and y are tainted
+  - Name:  myScanf
+VariadicType:  Dst
+VariadicIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, ); // y is tainted
+  - Name: myPropagator
+SrcArgs:  [0]
+DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:  mySnprintf
+SrcArgs:   [1]
+DstArgs:   [0, -1]
+VariadicType:  Src
+VariadicIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(); // x is not tainted anymore
+  - Name: myFilter
+Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+Args: [0, 2]
+
Index: lib/StaticAnalyzer/Checkers/Yaml.h
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/Yaml.h
@@ -0,0 +1,36 @@
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/YAMLTraits.h"
+
+/// Read the given file from the filesystem and parse it as a yaml file. The
+/// template parameter must have a yaml MappingTraits.
+template 
+llvm::Optional getConfiguration(clang::ento::CheckerManager ,
+   Checker *Chk, llvm::StringRef Option,
+   llvm::StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())
+return {};
+
+  llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
+  llvm::ErrorOr> Buffer =
+  FS->getBufferForFile(ConfigFile.str());
+
+  if (std::error_code ec = Buffer.getError()) {
+Mgr.reportInvalidCheckerOptionValue(Chk, Option,
+"a valid filename instead of '" +
+std::string(ConfigFile) + "'");
+return {};
+  }
+
+  llvm::yaml::Input Input(Buffer.get()->getBuffer());
+  T Config;
+  Input >> Config;
+
+  if (std::error_code ec = Input.error()) {
+Mgr.reportInvalidCheckerOptionValue(Chk, Option,
+"a valid yaml file: " + ec.message());
+return {};
+  }
+
+  return Config;
+}
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -15,16 +15,19 @@
 //===--===//
 
 #include "Taint.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "Yaml.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include 

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-06-06 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

In D59555#1514602 , @NoQ wrote:

> I'm still in doubts on how to connect your work with the `CallDescription` 
> effort. I'll think more about that.


I guess i'll just make a yaml reader for `CallDescription`s as soon as the 
interface settles down a bit, and then propose you to switch to using it.




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:804-805
+  auto *Checker = mgr.registerChecker();
+  StringRef ConfigFile =
+  mgr.getAnalyzerOptions().getCheckerStringOption(Checker, "Config", "");
+  llvm::Optional Config =

I think i'll softly advocate for a more centralized format that doesn't require 
every checker to implement an option for just that purpose.

Will you be happy with a global analyzer flag, eg. `-analyzer-config 
api-yaml=/home/foo/analyzer.yaml` and then:
```lang=yaml
Checker:
Name: alpha.security.taint.TaintPropagation
Config:
Propagations:
...
```
with possibly multiple checkers in the same file? I guess we can change it 
later if you don't mind breaking flag compatibility.



Comment at: lib/StaticAnalyzer/Checkers/Yaml.h:16-17
+  if (std::error_code ec = Buffer.getError()) {
+llvm::errs() << "Error when getting TaintPropagation's config file '"
+ << ConfigFile << "': " << ec.message() << '\n';
+return {};

I believe we should emit a compile error-like diagnostic here. One of the good 
things about compile errors would be that GUIs like scan-build would notify 
their users about compile errors in a friendly manner, while dumps to 
`llvm::errs()` will be completely ignored.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-06-06 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 203339.

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Yaml.h
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,5 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,51 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name: mySource1
+DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(); // x is tainted
+  - Name: mySource2
+DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", , ); // x and y are tainted
+  - Name:  myScanf
+VariadicType:  Dst
+VariadicIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, ); // y is tainted
+  - Name: myPropagator
+SrcArgs:  [0]
+DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:  mySnprintf
+SrcArgs:   [1]
+DstArgs:   [0, -1]
+VariadicType:  Src
+VariadicIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(); // x is not tainted anymore
+  - Name: myFilter
+Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+Args: [0, 2]
+
Index: lib/StaticAnalyzer/Checkers/Yaml.h
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/Yaml.h
@@ -0,0 +1,29 @@
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/YAMLTraits.h"
+
+/// Read the given file from the filesystem and parse it as a yaml file. The
+/// template parameter must have a yaml MappingTraits.
+template 
+llvm::Optional getConfiguration(llvm::StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())
+return {};
+
+  llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
+  llvm::ErrorOr> Buffer =
+  FS->getBufferForFile(ConfigFile.str());
+
+  if (std::error_code ec = Buffer.getError()) {
+llvm::errs() << "Error when getting TaintPropagation's config file '"
+ << ConfigFile << "': " << ec.message() << '\n';
+return {};
+  }
+
+  llvm::yaml::Input Input(Buffer.get()->getBuffer());
+  T Config;
+  Input >> Config;
+
+  if (std::error_code ec = Input.error())
+return {};
+
+  return Config;
+}
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -15,16 +15,19 @@
 //===--===//
 
 #include "Taint.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "Yaml.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
-#include 
-#include 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
+#include 
 #include 
 
 using namespace clang;
@@ -44,14 +47,49 @@
 
   void checkPreStmt(const CallExpr 

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-06-06 Thread Gábor Horváth via Phabricator via cfe-commits
xazax.hun added a comment.

Generally looks good, some nits inline. Please mark comments you already solved 
as done.




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:73-75
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;

boga95 wrote:
> boga95 wrote:
> > Szelethus wrote:
> > > boga95 wrote:
> > > > Szelethus wrote:
> > > > > We should definitely change these, not only is the large integer 
> > > > > number impossible to remember, but this value could differ on 
> > > > > different platforms.
> > > > I tried to use int, but I got a lot of warnings because of the 
> > > > `getNumArgs()` returns an unsigned value.
> > > What warnings? I thought we have `-Wsign-conversion` disabled.
> > I got `-Wsign-compare` warnings, but it compiles. I will change it in the 
> > next [[ https://reviews.llvm.org/D59637 | review ]] because that's contains 
> > the yaml file and the related tests.
> Now, this is just for internal representation. The -1 value is mapped to this.
What about the C++ way using numeric limits?



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:52
+  using ArgVector = SmallVector;
+  using SignedArgVector = SmallVector;
+

Is there a way to have only one type for argument vectors?



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:312
+
+  if (std::error_code ec = Input.error()) {
+return;

We tend to not write the braces in small cases like this.



Comment at: test/Analysis/Inputs/taint-generic-config.yaml:16
+VarType:  Dst
+VarIndex: 1
+

Here Var stands for variadic I guess. I would prefer to avoid abbreviations as 
they might be misleading.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-05-23 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 marked 3 inline comments as done.
boga95 added a comment.

I already thought about it. It would make the code much cleaner, but it would 
have a little performance impact (Does it matter?).
It's straightforward to read the supported functions from another yaml file. 
Besides that, it can support multiple config files too.




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:73-75
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;

boga95 wrote:
> Szelethus wrote:
> > boga95 wrote:
> > > Szelethus wrote:
> > > > We should definitely change these, not only is the large integer number 
> > > > impossible to remember, but this value could differ on different 
> > > > platforms.
> > > I tried to use int, but I got a lot of warnings because of the 
> > > `getNumArgs()` returns an unsigned value.
> > What warnings? I thought we have `-Wsign-conversion` disabled.
> I got `-Wsign-compare` warnings, but it compiles. I will change it in the 
> next [[ https://reviews.llvm.org/D59637 | review ]] because that's contains 
> the yaml file and the related tests.
Now, this is just for internal representation. The -1 value is mapped to this.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-05-23 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

Ok! This looks like there isn't that much code, so it should be fine to 
duplicate. Do you have plans to eliminate the existing switch and instead use 
yaml for *all* supported functions while shipping a default .yaml file with the 
analyzer?

I'm still in doubts on how to connect your work with the `CallDescription` 
effort. I'll think more about that.




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:210-218
+  /// Defines a map between the propagation function's name and
+  /// TaintPropagationRule.
+  static NameRuleMap CustomPropagations;
+
+  /// Defines a map between the filter function's name and filtering args.
+  static NameArgMap CustomFilters;
+

I think you don't need to make them static. By the time you need them, you 
already have access to the instance of the checker. Static variables of 
non-trivial types are frowned upon 
(https://llvm.org/docs/CodingStandards.html#do-not-use-static-constructors).



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:294
 
+void GenericTaintChecker::getConfiguration(StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())

Let's re-use at least this function. I.e., abstract it out from 
`GenericTaintChecker` and put it into a header accessible by all checkers 
(dunno, `lib/StaticAnalyzer/Checkers/Yaml.h`).



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:316
+
+  parseConfiguration(std::move(Config));
+}

Do we ever need to copy the config? Maybe make it a move-only type?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-05-17 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 10.
boga95 edited the summary of this revision.
boga95 added a comment.

I changed the parsing, therefore the return value index is represented by -1.
I added a test configuration file and parse it when testing to prove the 
configuration doesn't break the code.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,5 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,51 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name: mySource1
+DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(); // x is tainted
+  - Name: mySource2
+DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", , ); // x and y are tainted
+  - Name: myScanf
+VarType:  Dst
+VarIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, ); // y is tainted
+  - Name: myPropagator
+SrcArgs:  [0]
+DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name: mySnprintf
+SrcArgs:  [1]
+DstArgs:  [0, -1]
+VarType:  Src
+VarIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(); // x is not tainted anymore
+  - Name: myFilter
+Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+Args: [0, 2]
+
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -15,16 +15,17 @@
 //===--===//
 
 #include "Taint.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
 #include 
-#include 
 #include 
 
 using namespace clang;
@@ -44,14 +45,45 @@
 
   void checkPreStmt(const CallExpr *CE, CheckerContext ) const;
 
-  void printState(raw_ostream , ProgramStateRef State,
-  const char *NL, const char *Sep) const override;
+  void printState(raw_ostream , ProgramStateRef State, const char *NL,
+  const char *Sep) const override;
 
-private:
-  static const unsigned InvalidArgIndex = UINT_MAX;
+  using ArgVector = SmallVector;
+  using SignedArgVector = SmallVector;
+
+  enum class VariadicType { None, Src, Dst };
+
+  /// The ``TaintConfiguration`` is used to parse configuration file.
+  struct TaintConfiguration {
+using NameArgsPair = std::pair;
+
+struct Propagation {
+  std::string Name;
+  SignedArgVector SrcArgs;
+  SignedArgVector DstArgs;
+  VariadicType VarType;
+  unsigned VarIndex;
+};
+
+std::vector Propagations;
+std::vector Filters;
+std::vector Sinks;
+  };
+
+  /// Get and read the config file.
+  static void 

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-05-16 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 added a comment.

Sorry for the late answer, I was working on my thesis which is about taint 
analysis. During that, I implemented several cool features (namespaces, 
std::cin, std::string, etc.) for the checker. I will share them soon.

I thought about the API notes and I think it will fit very well into the 
checker. If my understanding is clear, the checker would be configured with 
attributes and/or a yaml file which contains the attributes. Therefore, the 
implementation will become simpler, because the checker will only read the 
attributes. I made a draft for the possible usage of the attributes:

  [[taint::dst(-1)]]
  int mySource(); // The return value will become tainted
  
  [[taint::src(0, 1)]] [[taint::dst(2)]]
  void myPropagator(int*, int*, int*);
  
  [[taint::src(0)]] [[taint::varDst(2)]]
  int myFscanf(FILE*, const char*, ...); // The variadic arguments will become 
tainted, if the first argument is tainted
  
  [[taint::dst(0, -1)]] [[taint::varSrc(2)]]
  int mySprintf(char*, const char*, ... ); // The first argument and the return 
value will become tainted, if any of the variadic arguments is tainted

I think we can use the current yaml configuration in order to not block my 
progress. I think most of the current implementation can be reused for the API 
notes. I will be able to easily change the interface after the API notes are 
ready.




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:73-75
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;

Szelethus wrote:
> boga95 wrote:
> > Szelethus wrote:
> > > We should definitely change these, not only is the large integer number 
> > > impossible to remember, but this value could differ on different 
> > > platforms.
> > I tried to use int, but I got a lot of warnings because of the 
> > `getNumArgs()` returns an unsigned value.
> What warnings? I thought we have `-Wsign-conversion` disabled.
I got `-Wsign-compare` warnings, but it compiles. I will change it in the next 
[[ https://reviews.llvm.org/D59637 | review ]] because that's contains the yaml 
file and the related tests.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-04-12 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

This new approach is clearly useful to other checkers as well, not only the 
Taint checker. I believe we should strongly consider generalizing it somehow, 
it's just too awesome to restrict to a single checker.

There's also a closely related technology called "API Notes" that allows you to 
inject annotations into system headers by shipping .yaml files with the 
compiler. It was actively pushed for for the Analyzer in particular but ended 
up never getting upstreamed:

- http://lists.llvm.org/pipermail/cfe-dev/2015-December/046335.html
- http://lists.llvm.org/pipermail/cfe-dev/2017-May/053860.html

I suspect that it'll make perfect sense for your use case to define a clang 
attribute for taint sources and sinks and then define an API notes definition 
that'll inject this attribute into headers. If only API notes were available, 
this would have been a perfect use case for them. I think it might be a good 
idea to ping the mailing lists about API notes one more time, announce that you 
*are* going for a similar technology anyway, and see if anybody suggests 
something.

Pros/cons:

- The attributes/API notes solution is very comfy because it both allows users 
to address their false positives / false negatives by adding annotations in 
their source *and* allows us to annotate system headers via yaml files, and (if 
i understand correctly) both sorts of annotations are accessible uniformly via 
`Decl->getAttr<...>()`.
  - If we decide to go for our own yaml format that doesn't work on top of 
attributes, we'll either get only one of these, or double up our implementation 
burden in checkers.
- Implementation burden should not be that high, but it will be annoying.
  - If we don't want users to annotate their headers with source-level 
annotations, it sounds easier to go for a custom yaml parser, because defining 
attributes is annoying.
- But i believe we do want them to in this case.

Does any of this make any sense within the bigger plans that you have for this 
checker?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-04-10 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added a comment.

Sorry, I rushed my earlier comment -- I definitely think that we should get rid 
of the `UINT_MAX` thing before landing this.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-04-10 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added a comment.

We agreed to disagree on the static function stuff -- it doesn't really matter, 
and I don't insist. I have no objections against this patch, great job! I won't 
formally accept to make it stand out a little more. Thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-04-01 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 updated this revision to Diff 193175.
boga95 marked an inline comment as done.
boga95 added a comment.

Rebase after https://reviews.llvm.org/D59861.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -15,14 +15,16 @@
 //===--===//
 
 #include "Taint.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
 #include 
 #include 
 #include 
@@ -47,11 +49,38 @@
   void printState(raw_ostream , ProgramStateRef State,
   const char *NL, const char *Sep) const override;
 
-private:
+  using ArgVector = SmallVector;
+
+  enum class VariadicType { None, Src, Dst };
+
+  /// The ``TaintConfiguration`` is used to parse configuration file.
+  struct TaintConfiguration {
+using NameArgsPair = std::pair;
+
+struct Propagation {
+  std::string Name;
+  ArgVector SrcArgs;
+  ArgVector DstArgs;
+  VariadicType VarType;
+  unsigned VarIndex;
+};
+
+std::vector Propagations;
+std::vector Filters;
+std::vector Sinks;
+  };
+
+  /// Get and read the config file.
+  static void getConfiguration(StringRef ConfigFile);
+
+  /// Parse the config.
+  static void parseConfiguration(TaintConfiguration &);
+
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;
 
+private:
   mutable std::unique_ptr BT;
   void initBugType() const {
 if (!BT)
@@ -97,8 +126,6 @@
   bool generateReportIfTainted(const Expr *E, const char Msg[],
CheckerContext ) const;
 
-  using ArgVector = SmallVector;
-
   /// A struct used to specify taint propagation rules for a function.
   ///
   /// If any of the possible taint source arguments is tainted, all of the
@@ -109,8 +136,6 @@
   /// ReturnValueIndex is added to the dst list, the return value will be
   /// tainted.
   struct TaintPropagationRule {
-enum class VariadicType { None, Src, Dst };
-
 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
  CheckerContext );
 
@@ -131,8 +156,7 @@
 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
   PropagationFunc(nullptr) {}
 
-TaintPropagationRule(std::initializer_list &,
- std::initializer_list &,
+TaintPropagationRule(ArgVector &, ArgVector &,
  VariadicType Var = VariadicType::None,
  unsigned VarIndex = InvalidArgIndex,
  PropagationFuncType Func = nullptr)
@@ -176,6 +200,19 @@
 static bool postSocket(bool IsTainted, const CallExpr *CE,
CheckerContext );
   };
+
+  using NameRuleMap = llvm::StringMap;
+  using NameArgMap = llvm::StringMap;
+
+  /// Defines a map between the propagation function's name and
+  /// TaintPropagationRule.
+  static NameRuleMap CustomPropagations;
+
+  /// Defines a map between the filter function's name and filtering args.
+  static NameArgMap CustomFilters;
+
+  /// Defines a map between the sink function's name and sinking args.
+  static NameArgMap CustomSinks;
 };
 
 const unsigned GenericTaintChecker::ReturnValueIndex;
@@ -194,14 +231,105 @@
 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
 "for character data and the null terminator)";
 
+GenericTaintChecker::NameRuleMap GenericTaintChecker::CustomPropagations;
+
+GenericTaintChecker::NameArgMap GenericTaintChecker::CustomFilters;
+
+GenericTaintChecker::NameArgMap GenericTaintChecker::CustomSinks;
 } // end of anonymous namespace
 
+using TaintConfig = GenericTaintChecker::TaintConfiguration;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits {
+  static void mapping(IO , TaintConfig ) {
+IO.mapOptional("Propagations", Config.Propagations);
+IO.mapOptional("Filters", Config.Filters);
+IO.mapOptional("Sinks", Config.Sinks);
+  }
+};
+
+template 

[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-03-26 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added inline comments.



Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:73-75
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;

boga95 wrote:
> Szelethus wrote:
> > We should definitely change these, not only is the large integer number 
> > impossible to remember, but this value could differ on different platforms.
> I tried to use int, but I got a lot of warnings because of the `getNumArgs()` 
> returns an unsigned value.
What warnings? I thought we have `-Wsign-conversion` disabled.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-03-26 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added a comment.

In D59555#1442331 , @boga95 wrote:

> Why is it better not to use `static` functions/variables? Has it any 
> performance impact?


I don't think so, I just think that it's easier to follow what's happening with 
a non-static field. Also, I just don't see why it needs to be static, you only 
use `GenericTaintChecker::Custom*` where you could easily access it. Especially 
`GenericTaintChecker::getConfiguration` -- if it wasn't static, you wouldn't 
even need to take the checker as a parameter.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-03-25 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 marked an inline comment as done.
boga95 added a comment.

Why is it better not to use `static` functions/variables? Has it any 
performance impact?




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:73-75
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;

Szelethus wrote:
> We should definitely change these, not only is the large integer number 
> impossible to remember, but this value could differ on different platforms.
I tried to use int, but I got a lot of warnings because of the `getNumArgs()` 
returns an unsigned value.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-03-25 Thread Kristóf Umann via Phabricator via cfe-commits
Szelethus added a comment.

I think the idea is awesome, thanks!

We really need to not use `UINT_MAX`, and I think `static` fields and member 
functions are a bit overused in this patch -- can't we just make 
`getConfiguration` and `parseConfiguration` static non-member functions, and 
make them take `GenericTaintChecker` as an argument?




Comment at: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:73-75
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;

We should definitely change these, not only is the large integer number 
impossible to remember, but this value could differ on different platforms.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59555: [analyzer] Add yaml parser to GenericTaintChecker

2019-03-19 Thread Borsik Gábor via Phabricator via cfe-commits
boga95 created this revision.
boga95 added reviewers: Szelethus, xazax.hun, dkrupp, NoQ.
Herald added subscribers: cfe-commits, Charusso, donat.nagy, mikhail.ramalho, 
a.sidorin, rnkovacs, szepet, baloghadamsoftware, whisperity.
Herald added a project: clang.

Parse the yaml configuration file and store it in static variables. The user 
can define taint propagation rules, custom sink, and filter functions. E.g:

  # A list of source/propagation function
  Propagations:
# int x = mySource1(); // x is tainted
- Name: mySource1
  DstArgs:  [4294967294] # Index for return value
  
# int x;
# mySource2(); // x is tainted
- Name: mySource2
  DstArgs:  [0]
  
# int x, y;
# myScanf("%d %d", , ); // x and y are tainted
- Name: myScanf
  VarType:  Dst
  VarIndex: 1
  
# int x; // x is tainted
# int y;
# myPropagator(x, ); // y is tainted
- Name: myPropagator
  SrcArgs:  [0]
  DstArgs:  [1]
  
# const unsigned size = 100;
# char buf[size];
# int x, y;
# int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
# // the return value and the buf will be tainted
- Name: mySnprintf
  SrcArgs:  [1]
  DstArgs:  [0, 4294967294]
  VarType:  Src
  VarIndex: 3
  
  # A list of filter functions
  Filters:
# int x; // x is tainted
# myFilter(); // x is not tainted anymore
- Name: myFilter
  Args: [0]
  
  # A list of sink functions
  Sinks:
# int x, y; // x and y are tainted
# mySink(x, 0, 1); // It will warn
# mySink(0, 1, y); // It will warn
# mySink(0, x, 1); // It won't warn
- Name: mySink
  Args: [0, 2]


Repository:
  rC Clang

https://reviews.llvm.org/D59555

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -13,14 +13,16 @@
 // aggressively, even if the involved symbols are under constrained.
 //
 //===--===//
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
 #include 
 #include 
 #include 
@@ -41,11 +43,38 @@
 
   void checkPreStmt(const CallExpr *CE, CheckerContext ) const;
 
-private:
+  using ArgVector = SmallVector;
+
+  enum class VariadicType { None, Src, Dst };
+
+  /// The ``TaintConfiguration`` is used to parse configuration file.
+  struct TaintConfiguration {
+using NameArgsPair = std::pair;
+
+struct Propagation {
+  std::string Name;
+  ArgVector SrcArgs;
+  ArgVector DstArgs;
+  VariadicType VarType;
+  unsigned VarIndex;
+};
+
+std::vector Propagations;
+std::vector Filters;
+std::vector Sinks;
+  };
+
+  /// Get and read the config file.
+  static void getConfiguration(StringRef ConfigFile);
+
+  /// Parse the config.
+  static void parseConfiguration(TaintConfiguration &);
+
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;
 
+private:
   mutable std::unique_ptr BT;
   void initBugType() const {
 if (!BT)
@@ -91,8 +120,6 @@
   bool generateReportIfTainted(const Expr *E, const char Msg[],
CheckerContext ) const;
 
-  using ArgVector = SmallVector;
-
   /// A struct used to specify taint propagation rules for a function.
   ///
   /// If any of the possible taint source arguments is tainted, all of the
@@ -103,8 +130,6 @@
   /// ReturnValueIndex is added to the dst list, the return value will be
   /// tainted.
   struct TaintPropagationRule {
-enum class VariadicType { None, Src, Dst };
-
 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
  CheckerContext );
 
@@ -125,8 +150,7 @@
 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
   PropagationFunc(nullptr) {}
 
-TaintPropagationRule(std::initializer_list &,
- std::initializer_list &,
+TaintPropagationRule(ArgVector &, ArgVector &,
  VariadicType Var = VariadicType::None,
  unsigned VarIndex = InvalidArgIndex,
  PropagationFuncType Func = nullptr)
@@ -170,6