https://github.com/dkrupp updated https://github.com/llvm/llvm-project/pull/66086
>From 889c886c3eed31335531ec61ad2b48bef15414d8 Mon Sep 17 00:00:00 2001 From: Daniel Krupp <daniel.kr...@ericsson.com> Date: Fri, 8 Sep 2023 16:57:49 +0200 Subject: [PATCH] [analyzer] TaintPropagation checker strlen() should not propagate strlen(..) call should not propagate taintedness, because it brings in many false positive findings. It is a common pattern to copy user provided input to another buffer. In these cases we always get warnings about tainted data used as the malloc parameter: buf = malloc(strlen(tainted_txt) + 1); // false warning This pattern can lead to a denial of service attack only, when the attacker can directly specify the size of the allocated area as an arbitrary large number (e.g. the value is converted from a user provided string). Later, we could reintroduce strlen() as a taint propagating function with the consideration not to emit warnings when the tainted value cannot be "arbitrarily large" (such as the size of an already allocated memory block). --- clang/docs/ReleaseNotes.rst | 7 +++++ clang/docs/analyzer/checkers.rst | 4 +-- .../Checkers/GenericTaintChecker.cpp | 7 ++--- .../test/Analysis/taint-diagnostic-visitor.c | 13 +++++----- clang/test/Analysis/taint-generic.c | 26 +++++++++++++------ 5 files changed, 38 insertions(+), 19 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3cdad2f7b9f0e5a..414cd7f62e2d764 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -409,6 +409,13 @@ Static Analyzer bitwise shift operators produce undefined behavior (because some operand is negative or too large). +- The ``alpha.security.taint.TaintPropagation`` checker no longer propagates + taint on ``strlen`` and ``strnlen`` calls, unless these are marked + explicitly propagators in the user-provided taint configuration file. + This removal empirically reduces the number of false positive reports. + Read the PR for the details. + (`#66086 <https://github.com/llvm/llvm-project/pull/66086>`_) + .. _release-notes-sanitizers: Sanitizers diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 54ea49e7426cc86..dbd6d7787823530 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2599,8 +2599,8 @@ Default propagations rules: ``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``, ``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``, ``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``, - ``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``, - ``strndupa``, ``strnlen``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``, + ``strcspn``, ``strncasecmp``, ``strncmp``, ``strndup``, + ``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``, ``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``, ``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth`` diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 5da0f34b3d0464f..dae8ff0c5c8f1b8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -695,9 +695,10 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + // strlen, wcslen, strnlen and alike intentionally don't propagate taint. + // See the details here: https://github.com/llvm/llvm-project/pull/66086 + {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c index f1b9ceebdd9a6b8..8a7510177f3e444 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor.c +++ b/clang/test/Analysis/taint-diagnostic-visitor.c @@ -10,6 +10,7 @@ int scanf(const char *restrict format, ...); int system(const char *command); char* getenv( const char* env_var ); size_t strlen( const char* str ); +int atoi( const char* str ); void *malloc(size_t size ); void free( void *ptr ); char *fgets(char *str, int n, FILE *stream); @@ -54,11 +55,11 @@ void taintDiagnosticVLA(void) { // propagating through variables and expressions char *taintDiagnosticPropagation(){ char *pathbuf; - char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + char *size=getenv("SIZE"); // expected-note {{Taint originated here}} // expected-note@-1 {{Taint propagated to the return value}} - if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + if (size){ // expected-note {{Assuming 'size' is non-null}} // expected-note@-1 {{Taking true branch}} - pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is used to specify the buffer size}} // expected-note@-1{{Untrusted data is used to specify the buffer size}} // expected-note@-2 {{Taint propagated to the return value}} return pathbuf; @@ -71,12 +72,12 @@ char *taintDiagnosticPropagation(){ char *taintDiagnosticPropagation2(){ char *pathbuf; char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source - char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}} + char *size=getenv("SIZE"); // expected-note {{Taint originated here}} // expected-note@-1 {{Taint propagated to the return value}} char *user_env=getenv("USER_ENV_VAR");//unrelated taint source - if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}} + if (size){ // expected-note {{Assuming 'size' is non-null}} // expected-note@-1 {{Taking true branch}} - pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} + pathbuf=(char*) malloc(atoi(size)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} // expected-note@-1{{Untrusted data is used to specify the buffer size}} // expected-note@-2 {{Taint propagated to the return value}} return pathbuf; diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index c674a03cfaec6c8..10a52cb44259551 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -443,14 +443,18 @@ int testSprintf_propagates_taint(char *buf, char *msg) { return 1 / x; // expected-warning {{Division by a tainted value, possibly zero}} } -void test_wchar_apis_propagate(const char *path) { +void test_wchar_apis_dont_propagate(const char *path) { + // strlen, wcslen, strnlen and alike intentionally don't propagate taint. + // See the details here: https://github.com/llvm/llvm-project/pull/66086 + // This isn't ideal, but this is only what we have now. + FILE *f = fopen(path, "r"); clang_analyzer_isTainted_charp((char*)f); // expected-warning {{YES}} wchar_t wbuf[10]; fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f); clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}} int n = wcslen(wbuf); - clang_analyzer_isTainted_int(n); // expected-warning {{YES}} + clang_analyzer_isTainted_int(n); // expected-warning {{NO}} wchar_t dst[100] = L"ABC"; clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}} @@ -458,7 +462,7 @@ void test_wchar_apis_propagate(const char *path) { clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}} int m = wcslen(dst); - clang_analyzer_isTainted_int(m); // expected-warning {{YES}} + clang_analyzer_isTainted_int(m); // expected-warning {{NO}} } int scanf_s(const char *format, ...); @@ -948,21 +952,27 @@ void testStrndupa(size_t n) { } size_t strlen(const char *s); -void testStrlen() { +void testStrlen_dont_propagate() { + // strlen, wcslen, strnlen and alike intentionally don't propagate taint. + // See the details here: https://github.com/llvm/llvm-project/pull/66086 + // This isn't ideal, but this is only what we have now. char s[10]; scanf("%9s", s); size_t result = strlen(s); - clang_analyzer_isTainted_int(result); // expected-warning {{YES}} + // strlen propagating taint would bring in many false positives + clang_analyzer_isTainted_int(result); // expected-warning {{NO}} } size_t strnlen(const char *s, size_t maxlen); -void testStrnlen(size_t maxlen) { +void testStrnlen_dont_propagate(size_t maxlen) { + // strlen, wcslen, strnlen and alike intentionally don't propagate taint. + // See the details here: https://github.com/llvm/llvm-project/pull/66086 + // This isn't ideal, but this is only what we have now. char s[10]; scanf("%9s", s); - size_t result = strnlen(s, maxlen); - clang_analyzer_isTainted_int(result); // expected-warning {{YES}} + clang_analyzer_isTainted_int(result); // expected-warning {{NO}} } long strtol(const char *restrict nptr, char **restrict endptr, int base); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits