Revision: 75999
http://sourceforge.net/p/brlcad/code/75999
Author: brlcad
Date: 2020-06-02 06:35:28 +0000 (Tue, 02 Jun 2020)
Log Message:
-----------
upgrade flawfinder from 2.0.4 to 2.0.11 which adds 5 new detections
Modified Paths:
--------------
brlcad/trunk/misc/flawfinder
Modified: brlcad/trunk/misc/flawfinder
===================================================================
--- brlcad/trunk/misc/flawfinder 2020-06-01 19:37:37 UTC (rev 75998)
+++ brlcad/trunk/misc/flawfinder 2020-06-02 06:35:28 UTC (rev 75999)
@@ -14,7 +14,7 @@
#
# Currently this program can only analyze C/C++ code.
#
-# Copyright (C) 2001-2017 David A. Wheeler.
+# Copyright (C) 2001-2019 David A. Wheeler.
# This is released under the
# GNU General Public License (GPL) version 2 or later (GPL-2.0+):
#
@@ -55,7 +55,7 @@
import hashlib
# import formatter
-version = "2.0.4"
+version = "2.0.11"
# Program Options - these are the default values.
# TODO: Switch to boolean types where appropriate.
@@ -80,7 +80,7 @@
patch_file = "" # File containing (unified) diff output.
loadhitlist = None
savehitlist = None
-diffhitlist = None
+diffhitlist_filename = None
quiet = 0
showheading = 1 # --dataonly turns this off
output_format = 0 # 0 = normal, 1 = html.
@@ -91,6 +91,10 @@
required_regex = None # If non-None, regex that must be met to report
required_regex_compiled = None
+ERROR_ON_DISABLED_VALUE = 999
+error_level = ERROR_ON_DISABLED_VALUE # Level where we're return error code
+error_level_exceeded = False
+
displayed_header = 0 # Have we displayed the header yet?
num_ignored_hits = 0 # Number of ignored hits (used if never_ignore==0)
@@ -112,10 +116,7 @@
sloc = 0 # Physical SLOC
starttime = time.time() # Used to determine analyzed lines/second.
-line_beginning = re.compile(r'(?m)^')
-blank_line = re.compile(r'(?m)^\s+$')
-
# Send warning message. This is written this way to work on
# Python version 2.5 through Python 3.
def print_warning(message):
@@ -183,9 +184,9 @@
# Also, "newfile" can have " (comment)" postpended. Find and eliminate this.
# Note that the expression below is Y10K (and Y100K) ready. :-).
diff_findjunk = re.compile(
- r'^(?P<filename>.*)(' +
- r'(\s\d\d\d\d+-\d\d-\d\d\s+\d\d:\d[0-9:.]+Z?(\s+[\-\+0-9A-Z]+)?)|' +
- r'(\s[A-Za-z][a-z]+\s[A-za-z][a-z]+\s\d+\s\d+:\d[0-9:.]+Z?' +
+ r'^(?P<filename>.*)('
+ r'(\s\d\d\d\d+-\d\d-\d\d\s+\d\d:\d[0-9:.]+Z?(\s+[\-\+0-9A-Z]+)?)|'
+ r'(\s[A-Za-z][a-z]+\s[A-za-z][a-z]+\s\d+\s\d+:\d[0-9:.]+Z?'
r'(\s[\-\+0-9]*)?\s\d\d\d\d+)|'
r'(\s\(.*\)))\s*$'
)
@@ -222,10 +223,6 @@
return None
-git_splitter = ' b/'
-len_git_splitter = len(git_splitter)
-
-
def git_diff_get_filename(sLine):
return diff_git_filename.match(sLine)
@@ -242,7 +239,7 @@
hPatch = open(input_patch_file, 'r')
except BaseException:
print("Error: failed to open", h(input_patch_file))
- sys.exit(1)
+ sys.exit(10)
patched_filename = "" # Name of new file patched by current hunk.
@@ -256,7 +253,7 @@
fn_get_filename = gnu_diff_get_filename
else:
print("Error: Unrecognized patch format")
- sys.exit(1)
+ sys.exit(11)
while True: # Loop-and-half construct. Read a line, end loop when no more
@@ -268,7 +265,7 @@
if patched_filename in patch:
error("filename occurs more than once in the patch: %s" %
patched_filename)
- sys.exit(1)
+ sys.exit(12)
else:
patch[patched_filename] = {}
else:
@@ -276,10 +273,10 @@
if hunk_match:
if patched_filename == "":
error(
- "wrong type of patch file : " +
+ "wrong type of patch file : "
"we have a line number without having seen a filename"
)
- sys.exit(1)
+ sys.exit(13)
initial_number = hunk_match.group('linenumber')
line_counter = 0
else:
@@ -311,10 +308,12 @@
# Take s, and return legal (UTF-8) HTML.
return s.replace("&", "&").replace("<", "<").replace(">", ">")
+
def h(s):
# htmlize s if we're generating html, otherwise just return s.
return htmlize(s) if output_format else s
+
def print_multi_line(text):
# Print text as multiple indented lines.
width = 78
@@ -331,11 +330,12 @@
position = starting_position
print(' ', end='')
print(w, end='')
- position = position + len(w) + 1
+ position += len(w) + 1
+
# This matches references to CWE identifiers, so we can HTMLize them.
# We don't refer to CWEs with one digit, so we'll only match on 2+ digits.
-link_cwe_pattern = re.compile(r'(CWE-([1-9][0-9]+))([,()])')
+link_cwe_pattern = re.compile(r'(CWE-([1-9][0-9]+))([,()!/])')
# This matches the CWE data, including multiple entries.
find_cwe_pattern = re.compile(r'\(CWE-[^)]*\)')
@@ -394,6 +394,16 @@
def __getitem__(self, X): # Define this so this works: "%(line)" % hit
return getattr(self, X)
+ def __eq__(self, other):
+ return (self.filename == other.filename
+ and self.line == other.line
+ and self.column == other.column
+ and self.level == other.level
+ and self.name == other.name)
+
+ def __ne__(self, other):
+ return not self == other
+
# return CWEs
def cwes(self):
result = find_cwe_pattern.search(self.warning)
@@ -439,7 +449,7 @@
main_text = h("%(warning)s. " % self)
if output_format: # Create HTML link to CWE definitions
main_text = link_cwe_pattern.sub(
- r'<a
href="http://cwe.mitre.org/data/definitions/\2.html">\1</a>\3',
+ r'<a
href="https://cwe.mitre.org/data/definitions/\2.html">\1</a>\3',
main_text)
if single_line:
print(main_text, end='')
@@ -448,8 +458,8 @@
print(' ' + h(self.note), end='')
else:
if self.suggestion:
- main_text = main_text + h(self.suggestion) + ". "
- main_text = main_text + h(self.note)
+ main_text += h(self.suggestion) + ". "
+ main_text += h(self.note)
print()
print_multi_line(main_text)
if output_format:
@@ -476,13 +486,12 @@
if required_regex and (required_regex_compiled.search(hit.warning) is
None):
return
- if hit.level >= minimum_level:
- if linenumber == ignoreline:
- num_ignored_hits = num_ignored_hits + 1
- else:
- hitlist.append(hit)
- if show_immediately:
- hit.show()
+ if linenumber == ignoreline:
+ num_ignored_hits += 1
+ else:
+ hitlist.append(hit)
+ if show_immediately:
+ hit.show()
def internal_warn(message):
@@ -501,12 +510,12 @@
if text[i] == '(':
break
elif text[i] in string.whitespace:
- i = i + 1
+ i += 1
else:
return []
else: # Never found a reasonable ending.
return []
- i = i + 1
+ i += 1
parameters = [""] # Insert 0th entry, so 1st parameter is parameter[1].
currentstart = i
parenlevel = 1
@@ -526,11 +535,11 @@
# parse that deeply, we just need to know we'll stay
# in string mode:
elif c == '\\':
- i = i + 1
+ i += 1
elif incomment:
if c == '*' and text[i:i + 2] == '*/':
incomment = 0
- i = i + 1
+ i += 1
else:
if c == '"':
instring = 1
@@ -538,20 +547,20 @@
instring = 2
elif c == '/' and text[i:i + 2] == '/*':
incomment = 1
- i = i + 1
+ i += 1
elif c == '/' and text[i:i + 2] == '//':
while i < len(text) and text[i] != "\n":
- i = i + 1
+ i += 1
elif c == '\\' and text[i:i + 2] == '\\"':
- i = i + 1 # Handle exposed '\"'
+ i += 1 # Handle exposed '\"'
elif c == '(':
- parenlevel = parenlevel + 1
+ parenlevel += 1
elif c == ',' and (parenlevel == 1):
parameters.append(
p_trailingbackslashes.sub('',
text[currentstart:i]).strip())
currentstart = i + 1
elif c == ')':
- parenlevel = parenlevel - 1
+ parenlevel -= 1
if parenlevel <= 0:
parameters.append(
p_trailingbackslashes.sub(
@@ -565,9 +574,10 @@
"Parsing failed to find end of parameter list; "
"semicolon terminated it in %s" % text[pos:pos + 200])
return parameters
- i = i + 1
+ i += 1
internal_warn("Parsing failed to find end of parameter list in %s" %
text[pos:pos + 200])
+ return [] # Treat unterminated list as an empty list
# These patterns match gettext() and _() for internationalization.
@@ -577,8 +587,8 @@
# In practice, this doesn't seem to be a problem; gettext() is usually
# wrapped around the entire parameter.
# The ?s makes it posible to match multi-line strings.
-gettext_pattern = re.compile(r'(?s)^\s*' + 'gettext' + r'\s*\((.*)\)\s*$')
-undersc_pattern = re.compile(r'(?s)^\s*' + '_(T(EXT)?)?' + r'\s*\((.*)\)\s*$')
+gettext_pattern = re.compile(r'(?s)^\s*' 'gettext' r'\s*\((.*)\)\s*$')
+undersc_pattern = re.compile(r'(?s)^\s*' '_(T(EXT)?)?' r'\s*\((.*)\)\s*$')
def strip_i18n(text):
@@ -604,6 +614,7 @@
"Returns true if text is a C string with 0 or 1 character."
return 1 if p_c_singleton_string.search(text) else 0
+
# This string defines a C constant.
p_c_constant_string = re.compile(r'^\s*L?"([^\\]|\\[^0-6]|\\[0-6]+)*"$')
@@ -614,7 +625,20 @@
# Precompile patterns for speed.
+p_memcpy_sizeof = re.compile(r'sizeof\s*\(\s*([^)\s]*)\s*\)')
+p_memcpy_param_amp = re.compile(r'&?\s*(.*)')
+def c_memcpy(hit):
+ if len(hit.parameters) < 4: # 3 parameters
+ add_warning(hit)
+ return
+
+ m1 = re.search(p_memcpy_param_amp, hit.parameters[1])
+ m3 = re.search(p_memcpy_sizeof, hit.parameters[3])
+ if not m1 or not m3 or m1.group(1) != m3.group(1):
+ add_warning(hit)
+
+
def c_buffer(hit):
source_position = hit.source_position
if source_position <= len(hit.parameters) - 1:
@@ -628,7 +652,7 @@
add_warning(hit)
-p_dangerous_strncat = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' +
+p_dangerous_strncat = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+'
r'\s*(\)\s*)?(-\s*1\s*)?$')
# This is a heuristic: constants in C are usually given in all
# upper case letters. Yes, this need not be true, but it's true often
@@ -666,7 +690,7 @@
hit.level = 5
hit.note = (
"Risk is high; the length parameter appears to be a constant, "
- + "instead of computing the number of characters left.")
+ "instead of computing the number of characters left.")
add_warning(hit)
return
c_buffer(hit)
@@ -744,9 +768,9 @@
elif p_low_risk_scanf_format.search(source):
# This is often okay, but sometimes extremely serious.
hit.level = 1
- hit.warning = ("It's unclear if the %s limit in the " +
+ hit.warning = ("It's unclear if the %s limit in the "
"format string is small enough (CWE-120)")
- hit.suggestion = ("Check that the limit is sufficiently " +
+ hit.suggestion = ("Check that the limit is sufficiently "
"small, or use a different input function")
else:
# No risky scanf request.
@@ -756,16 +780,16 @@
hit.note = "No risky scanf format detected."
else:
# Format isn't a constant.
- hit.note = ("If the scanf format is influenceable " +
+ hit.note = ("If the scanf format is influenceable "
"by an attacker, it's exploitable.")
add_warning(hit)
-p_dangerous_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' +
+p_dangerous_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+'
r'\s*(\)\s*)?(-\s*1\s*)?$')
p_safe_multi_byte = re.compile(
- r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+\s*(\)\s*)?' +
- r'/\s*sizeof\s*\(\s*?[A-Za-z_$0-9]+\s*' + r'\[\s*0\s*\]\)\s*(-\s*1\s*)?$')
+ r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+\s*(\)\s*)?'
+ r'/\s*sizeof\s*\(\s*?[A-Za-z_$0-9]+\s*\[\s*0\s*\]\)\s*(-\s*1\s*)?$')
def c_multi_byte_to_wide_char(hit):
@@ -778,7 +802,7 @@
hit.level = 5
hit.note = (
"Risk is high, it appears that the size is given as bytes, but
the "
- + "function requires size as characters.")
+ "function requires size as characters.")
elif p_safe_multi_byte.search(num_chars_to_copy):
# This isn't really risk-free, since it might not be the
destination,
# or the destination might be a character array (if it's a char
pointer,
@@ -816,6 +840,14 @@
add_warning(hit) # Found a static array, warn about it.
+def cpp_unsafe_stl(hit):
+ # Use one of the overloaded classes from the STL in C++14 and higher
+ # instead of the <C++14 versions of theses functions that did not
+ # if the second iterator could overflow
+ if len(hit.parameters) <= 4:
+ add_warning(hit)
+
+
def normal(hit):
add_warning(hit)
@@ -850,7 +882,7 @@
"Consider using a function version that stops copying at the end of the
buffer",
"buffer", "", {}),
"memcpy|CopyMemory|bcopy":
- (normal, 2, # I've found this to have a lower risk in practice.
+ (c_memcpy, 2, # I've found this to have a lower risk in practice.
"Does not check for buffer overflows when copying to destination
(CWE-120)",
"Make sure destination can always hold the source data",
"buffer", "", {}),
@@ -865,7 +897,7 @@
"",
"buffer", "", {}),
# TODO: Do more analysis. Added because they're in MS banned list.
-
"StrCat|StrCatA|StrcatW|lstrcatA|lstrcatW|strCatBuff|StrCatBuffA|StrCatBuffW|StrCatChainW|_tccat|_mbccat|_ftcsat|StrCatN|StrCatNA|StrCatNW|StrNCat|StrNCatA|StrNCatW|lstrncat|lstrcatnA|lstrcatnW":
+
"StrCat|StrCatA|StrcatW|lstrcatA|lstrcatW|strCatBuff|StrCatBuffA|StrCatBuffW|StrCatChainW|_tccat|_mbccat|_ftcscat|StrCatN|StrCatNA|StrCatNW|StrNCat|StrNCatA|StrNCatW|lstrncat|lstrcatnA|lstrcatnW":
(normal, 4,
"Does not check for buffer overflows when concatenating to destination
[MS-banned] (CWE-120)",
"",
@@ -875,7 +907,7 @@
1, # Low risk level, because this is often used correctly when FIXING
security
# problems, and raising it to a higher risk level would cause many false
# positives.
- "Easily used incorrectly; doesn't always \\0-terminate or " +
+ "Easily used incorrectly; doesn't always \\0-terminate or "
"check for invalid pointers [MS-banned] (CWE-120)",
"",
"buffer", "", {}),
@@ -884,7 +916,7 @@
1, # Low risk level, because this is often used correctly when FIXING
security
# problems, and raising it to a higher risk levle would cause many false
# positives.
- "Easily used incorrectly; doesn't always \\0-terminate or " +
+ "Easily used incorrectly; doesn't always \\0-terminate or "
"check for invalid pointers [MS-banned] (CWE-120)",
"",
"buffer", "", {}),
@@ -911,9 +943,9 @@
"buffer", "", {}),
"char|TCHAR|wchar_t": # This isn't really a function call, but it works.
(c_static_array, 2,
- "Statically-sized arrays can be improperly restricted, " +
+ "Statically-sized arrays can be improperly restricted, "
"leading to potential overflows or other issues (CWE-119!/CWE-120)",
- "Perform bounds checking, use functions that limit length, " +
+ "Perform bounds checking, use functions that limit length, "
"or ensure that the size is larger than the maximum possible length",
"buffer", "", {'extract_lookahead': 1}),
@@ -943,7 +975,7 @@
# The "syslog" hook will raise "format" issues.
"syslog":
(c_printf, 4,
- "If syslog's format strings can be influenced by an attacker, " +
+ "If syslog's format strings can be influenced by an attacker, "
"they can be exploited (CWE-134)",
"Use a constant format string for syslog",
"format", "", {'format_position': 2}),
@@ -950,7 +982,7 @@
"snprintf|vsnprintf|_snprintf|_sntprintf|_vsntprintf":
(c_printf, 4,
- "If format strings can be influenced by an attacker, they can be " +
+ "If format strings can be influenced by an attacker, they can be "
"exploited, and note that sprintf variations do not always \\0-terminate
(CWE-134)",
"Use a constant for the format specification",
"format", "", {'format_position': 3}),
@@ -957,7 +989,7 @@
"scanf|vscanf|wscanf|_tscanf|vwscanf":
(c_scanf, 4,
- "The scanf() family's %s operation, without a limit specification, " +
+ "The scanf() family's %s operation, without a limit specification, "
"permits buffer overflows (CWE-120, CWE-20)",
"Specify a limit to %s, or use a different input function",
"buffer", "", {'input': 1}),
@@ -974,8 +1006,8 @@
# Often this isn't really a risk, and even when it is, at worst it
# often causes a program crash (and nothing worse).
1,
- "Does not handle strings that are not \\0-terminated; " +
- "if given one it may perform an over-read (it could cause a crash " +
+ "Does not handle strings that are not \\0-terminated; "
+ "if given one it may perform an over-read (it could cause a crash "
"if unprotected) (CWE-126)",
"",
"buffer", "", {}),
@@ -1001,10 +1033,10 @@
"realpath":
(normal, 3,
- "This function does not protect against buffer overflows, " +
+ "This function does not protect against buffer overflows, "
"and some implementations can overflow internally (CWE-120/CWE-785!)",
- "Ensure that the destination buffer is at least of size MAXPATHLEN, and" +
- "to protect against implementation problems, the input argument " +
+ "Ensure that the destination buffer is at least of size MAXPATHLEN, and"
+ "to protect against implementation problems, the input argument "
"should also be checked to ensure it is no larger than MAXPATHLEN",
"buffer", "dangers-c", {}),
@@ -1030,43 +1062,43 @@
"access": # ???: TODO: analyze TOCTOU more carefully.
(normal, 4,
- "This usually indicates a security flaw. If an " +
- "attacker can change anything along the path between the " +
- "call to access() and the file's actual use (e.g., by moving " +
+ "This usually indicates a security flaw. If an "
+ "attacker can change anything along the path between the "
+ "call to access() and the file's actual use (e.g., by moving "
"files), the attacker can exploit the race condition (CWE-362/CWE-367!)",
- "Set up the correct permissions (e.g., using setuid()) and " +
+ "Set up the correct permissions (e.g., using setuid()) and "
"try to open the file directly",
"race",
"avoid-race#atomic-filesystem", {}),
"chown":
(normal, 5,
- "This accepts filename arguments; if an attacker " +
+ "This accepts filename arguments; if an attacker "
"can move those files, a race condition results. (CWE-362)",
"Use fchown( ) instead",
"race", "", {}),
"chgrp":
(normal, 5,
- "This accepts filename arguments; if an attacker " +
+ "This accepts filename arguments; if an attacker "
"can move those files, a race condition results. (CWE-362)",
"Use fchgrp( ) instead",
"race", "", {}),
"chmod":
(normal, 5,
- "This accepts filename arguments; if an attacker " +
+ "This accepts filename arguments; if an attacker "
"can move those files, a race condition results. (CWE-362)",
"Use fchmod( ) instead",
"race", "", {}),
"vfork":
(normal, 2,
- "On some old systems, vfork() permits race conditions, and it's " +
+ "On some old systems, vfork() permits race conditions, and it's "
"very difficult to use correctly (CWE-362)",
"Use fork() instead",
"race", "", {}),
"readlink":
(normal, 5,
- "This accepts filename arguments; if an attacker " +
- "can move those files or change the link content, " +
- "a race condition results. " +
+ "This accepts filename arguments; if an attacker "
+ "can move those files or change the link content, "
+ "a race condition results. "
"Also, it does not terminate with ASCII NUL. (CWE-362, CWE-20)",
# This is often just a bad idea, and it's hard to suggest a
# simple alternative:
@@ -1112,7 +1144,7 @@
# Windows. TODO: Detect correct usage approaches and ignore it.
"GetTempFileName":
(normal, 3,
- "Temporary file race condition in certain cases " +
+ "Temporary file race condition in certain cases "
"(e.g., if run as SYSTEM in many versions of Windows) (CWE-377)",
"",
"tmpfile", "avoid-race", {}),
@@ -1121,7 +1153,7 @@
"execl|execlp|execle|execv|execvp|system|popen|WinExec|ShellExecute":
(normal, 4,
"This causes a new program to execute and is difficult to use safely
(CWE-78)",
- "try using a library call that implements the same functionality " +
+ "try using a library call that implements the same functionality "
"if available",
"shell", "", {}),
@@ -1138,16 +1170,16 @@
"CreateProcess":
(c_hit_if_null, 3,
"This causes a new process to execute and is difficult to use safely
(CWE-78)",
- "Specify the application path in the first argument, NOT as part of the
second, " +
+ "Specify the application path in the first argument, NOT as part of the
second, "
"or embedded spaces could allow an attacker to force a different program
to run",
"shell", "", {'check_for_null': 1}),
"atoi|atol|_wtoi|_wtoi64":
(normal, 2,
- "Unless checked, the resulting number can exceed the expected range " +
+ "Unless checked, the resulting number can exceed the expected range "
"(CWE-190)",
- "If source untrusted, check both minimum and maximum, even if the" +
- " input had no minus sign (large numbers can roll over into negative" +
+ "If source untrusted, check both minimum and maximum, even if the"
+ " input had no minus sign (large numbers can roll over into negative"
" number; consider saving to an unsigned value if that is intended)",
"integer", "dangers-c", {}),
@@ -1158,13 +1190,13 @@
"Use a more secure technique for acquiring random values",
"random", "", {}),
- "crypt":
+ "crypt|crypt_r":
(normal, 4,
- "Function crypt is a poor one-way hashing algorithm; " +
- "since it only accepts passwords of 8 characters or less, " +
- "and only a two-byte salt, it is excessively vulnerable to " +
+ "The crypt functions use a poor one-way hashing algorithm; "
+ "since they only accept passwords of 8 characters or fewer "
+ "and only a two-byte salt, they are excessively vulnerable to "
"dictionary attacks given today's faster computing equipment (CWE-327)",
- "Use a different algorithm, such as SHA-256, with a larger " +
+ "Use a different algorithm, such as SHA-256, with a larger, "
"non-repeating salt",
"crypto", "", {}),
@@ -1172,7 +1204,7 @@
"EVP_des_ecb|EVP_des_cbc|EVP_des_cfb|EVP_des_ofb|EVP_desx_cbc":
(normal, 4,
"DES only supports a 56-bit keysize, which is too small given today's
computers (CWE-327)",
- "Use a different patent-free encryption algorithm with a larger keysize,
" +
+ "Use a different patent-free encryption algorithm with a larger keysize, "
"such as 3DES or AES",
"crypto", "", {}),
@@ -1180,7 +1212,7 @@
"EVP_rc4_40|EVP_rc2_40_cbc|EVP_rc2_64_cbc":
(normal, 4,
"These keysizes are too small given today's computers (CWE-327)",
- "Use a different patent-free encryption algorithm with a larger keysize,
" +
+ "Use a different patent-free encryption algorithm with a larger keysize, "
"such as 3DES or AES",
"crypto", "", {}),
@@ -1187,31 +1219,30 @@
"chroot":
(normal, 3,
"chroot can be very helpful, but is hard to use correctly (CWE-250,
CWE-22)",
- "Make sure the program immediately chdir(\"/\")," +
- " closes file descriptors," +
- " and drops root privileges, and that all necessary files" +
+ "Make sure the program immediately chdir(\"/\"), closes file descriptors,"
+ " and drops root privileges, and that all necessary files"
" (and no more!) are in the new root",
"misc", "", {}),
"getenv|curl_getenv":
- (normal, 3, "Environment variables are untrustable input if they can be" +
- " set by an attacker. They can have any content and" +
+ (normal, 3, "Environment variables are untrustable input if they can be"
+ " set by an attacker. They can have any content and"
" length, and the same variable can be set more than once (CWE-807,
CWE-20)",
"Check environment variables carefully before using them",
"buffer", "", {'input': 1}),
"g_get_home_dir":
- (normal, 3, "This function is synonymous with 'getenv(\"HOME\")';" +
- "it returns untrustable input if the environment can be" +
- "set by an attacker. It can have any content and length, " +
+ (normal, 3, "This function is synonymous with 'getenv(\"HOME\")';"
+ "it returns untrustable input if the environment can be"
+ "set by an attacker. It can have any content and length, "
"and the same variable can be set more than once (CWE-807, CWE-20)",
"Check environment variables carefully before using them",
"buffer", "", {'input': 1}),
"g_get_tmp_dir":
- (normal, 3, "This function is synonymous with 'getenv(\"TMP\")';" +
- "it returns untrustable input if the environment can be" +
- "set by an attacker. It can have any content and length, " +
+ (normal, 3, "This function is synonymous with 'getenv(\"TMP\")';"
+ "it returns untrustable input if the environment can be"
+ "set by an attacker. It can have any content and length, "
"and the same variable can be set more than once (CWE-807, CWE-20)",
"Check environment variables carefully before using them",
"buffer", "", {'input': 1}),
@@ -1220,8 +1251,8 @@
# These are Windows-unique:
# TODO: Should have lower risk if the program checks return value.
- "RpcImpersonateClient|ImpersonateLoggedOnUser|CoImpersonateClient|" +
-
"ImpersonateNamedPipeClient|ImpersonateDdeClientWindow|ImpersonateSecurityContext|"
+
+ "RpcImpersonateClient|ImpersonateLoggedOnUser|CoImpersonateClient|"
+
"ImpersonateNamedPipeClient|ImpersonateDdeClientWindow|ImpersonateSecurityContext|"
"SetThreadToken":
(normal, 4, "If this call fails, the program could fail to drop heightened
privileges (CWE-250)",
"Make sure the return value is checked, and do not continue if a failure
is reported",
@@ -1244,7 +1275,7 @@
"SetSecurityDescriptorDacl":
(c_hit_if_null, 5,
- "Never create NULL ACLs; an attacker can set it to Everyone (Deny All
Access), " +
+ "Never create NULL ACLs; an attacker can set it to Everyone (Deny All
Access), "
"which would even forbid administrator access (CWE-732)",
"",
"misc", "", {'check_for_null': 3}),
@@ -1281,7 +1312,7 @@
"gsignal|ssignal":
(normal, 2,
- "These functions are considered obsolete on most systems, and very
non-poertable (Linux-based systems handle them radically different, basically
if gsignal/ssignal were the same as raise/signal respectively, while System V
considers them a separate set and obsolete) (CWE-676)",
+ "These functions are considered obsolete on most systems, and very
non-portable (Linux-based systems handle them radically different, basically if
gsignal/ssignal were the same as raise/signal respectively, while System V
considers them a separate set and obsolete) (CWE-676)",
"Switch to raise/signal, or some other signalling approach",
"obsolete", "", {}),
@@ -1309,6 +1340,15 @@
"Make sure input data is filtered, especially if an attacker could
manipulate it",
"input", "", {'input': 1}),
+ # Unsafe STL functions that don't check the second iterator
+ "equal|mismatch|is_permutation":
+ (cpp_unsafe_stl,
+ # Like strlen, this is mostly a risk to availability; at worst it
+ # often causes a program crash.
+ 1,
+ "Function does not check the second iterator for over-read conditions
(CWE-126)",
+ "This function is often discouraged by most C++ coding standards in favor
of its safer alternatives provided since C++14. Consider using a form of this
function that checks the second iterator before potentially overflowing it",
+ "buffer", "", {}),
# TODO: detect C++'s: cin >> charbuf, where charbuf is a char array; the
problem
# is that flawfinder doesn't have type information, and ">>" is safe
with
@@ -1350,7 +1390,7 @@
if c == '(':
return 1
elif c in string.whitespace:
- i = i + 1
+ i += 1
else:
if falsepositive:
return 0 # No following "(", presume invalid.
@@ -1390,7 +1430,7 @@
if hitlist[i].filename == filename and hitlist[i].line == linenumber:
del hitlist[i] # DESTROY - this is a DESTRUCTIVE iterator.
hitfound = 1 # Don't break, because there may be more than one.
- num_ignored_hits = num_ignored_hits + 1
+ num_ignored_hits += 1
if not hitfound:
ignoreline = linenumber + 1 # Nothing found - ignore next line.
@@ -1426,7 +1466,7 @@
linebegin = 1
codeinline = 0 # 1 when we see some code (so increment sloc at newline)
- if (patch_infos is not None) and (not f in patch_infos):
+ if (patch_infos is not None) and (f not in patch_infos):
# This file isn't in the patch list, so don't bother analyzing it.
if not quiet:
if output_format:
@@ -1442,13 +1482,13 @@
# Symlinks should never get here, but just in case...
if (not allowlink) and os.path.islink(f):
print("BUG! Somehow got a symlink in process_c_file!")
- num_links_skipped = num_links_skipped + 1
+ num_links_skipped += 1
return
try:
my_input = open(f, "r")
except BaseException:
print("Error: failed to open", h(f))
- sys.exit(1)
+ sys.exit(14)
# Read ENTIRE file into memory. Use readlines() to convert \n if
necessary.
# This turns out to be very fast in Python, even on large files, and it
@@ -1465,7 +1505,39 @@
print("Examining", f)
sys.stdout.flush()
- text = "".join(my_input.readlines())
+ # Python3 is often configured to use only UTF-8, and presumes
+ # that inputs cannot have encoding errors.
+ # The real world isn't like that, so provide a prettier warning
+ # in such cases - with some hints on how to solve it.
+ try:
+ text = "".join(my_input.readlines())
+ except UnicodeDecodeError as err:
+ print('Error: encoding error in', h(f))
+ print(err)
+ print()
+ print('Python3 requires input character data to be perfectly encoded;')
+ print('it also requires perfectly correct system encoding settings.')
+ print('Unfortunately, your data and/or system settings are not.')
+ print('Here are some options:')
+ print('1. Run: PYTHONUTF8=0 python3 flawfinder')
+ print(' if your system and and data are all properly set up for')
+ print(' a non-UTF-8 encoding.')
+ print('2. Run: PYTHONUTF8=0 LC_ALL=C.ISO-2022 python3 flawfinder')
+ print(' if your data has a specific encoding such as ISO-2022')
+ print(' (replace "ISO-2022" with the name of your encoding,')
+ print(' and optionally replace "C" with your native language).')
+ print('3. Run: PYTHONUTF8=0 LC_ALL=C.ISO-8859-1 python3 flawfinder')
+ print(' if your data has an unknown or inconsistent encoding')
+ print(' (ISO-8859-1 encoders normally allow anything).')
+ print('4. Convert all your source code to the UTF-8 encoding.')
+ print(' The system program "iconv" or Python program "cvt2utf" can')
+ print(' do this (for cvt2tuf, you can use "pip install cvt2utf").')
+ print('5. Run: python2 flawfinder')
+ print(' (That is, use Python 2 instead of Python 3).')
+ print('Some of these options may not work depending on circumstance.')
+ print('In the long term, we recommend using UTF-8 for source code.')
+ print('For more information, see the documentation.')
+ sys.exit(15)
i = 0
while i < len(text):
@@ -1495,15 +1567,15 @@
i = m.end(0)
continue
if c == "\n":
- linenumber = linenumber + 1
- sumlines = sumlines + 1
+ linenumber += 1
+ sumlines += 1
linebegin = 1
if codeinline:
- sloc = sloc + 1
+ sloc += 1
codeinline = 0
- i = i + 1
+ i += 1
continue
- i = i + 1 # From here on, text[i] points to next character.
+ i += 1 # From here on, text[i] points to next character.
if i < len(text):
nextc = text[i]
else:
@@ -1510,11 +1582,11 @@
nextc = ''
if incomment:
if c == '*' and nextc == '/':
- i = i + 1
+ i += 1
incomment = 0
elif instring:
if c == '\\' and (nextc != "\n"):
- i = i + 1
+ i += 1
elif c == '"' and instring == 1:
instring = 0
elif c == "'" and instring == 2:
@@ -1525,7 +1597,7 @@
i + 1) # Is there a directive here?
if m:
process_directive()
- i = i + 1
+ i += 1
incomment = 1
elif c == '/' and nextc == '/': # "//" comments - skip to EOL.
m = p_directive.match(text,
@@ -1533,7 +1605,7 @@
if m:
process_directive()
while i < len(text) and text[i] != "\n":
- i = i + 1
+ i += 1
elif c == '"':
instring = 1
codeinline = 1
@@ -1574,11 +1646,11 @@
elif p_digits.match(c):
while i < len(text) and p_digits.match(
text[i]): # Process a number.
- i = i + 1
+ i += 1
# else some other character, which we ignore.
# End of loop through text. Wrap up.
if codeinline:
- sloc = sloc + 1
+ sloc += 1
if incomment:
error("File ended while in comment.")
if instring:
@@ -1592,12 +1664,12 @@
# Note that this "for" loop modifies the ruleset while it's iterating,
# so we *must* convert the keys into a list before iterating.
for rule in list(ruleset.keys()):
- if "|" in rule: # We found a rule to expand.
+ if "|" in rule: # We found a rule to expand.
for newrule in rule.split("|"):
if newrule in ruleset:
print("Error: Rule %s, when expanded, overlaps %s" % (
rule, newrule))
- sys.exit(1)
+ sys.exit(15)
ruleset[newrule] = ruleset[rule]
del ruleset[rule]
# To print out the set of keys in the expanded ruleset, run:
@@ -1641,7 +1713,7 @@
if output_format:
print(
'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01
Transitional//EN" '
- + '"http://www.w3.org/TR/html4/loose.dtd">')
+ '"http://www.w3.org/TR/html4/loose.dtd">')
print("<html>")
print("<head>")
print('<meta http-equiv="Content-type" content="text/html;
charset=utf8">')
@@ -1652,10 +1724,10 @@
print("<body>")
print("<h1>Flawfinder Results</h1>")
print("Here are the security scan results from")
- print('<a href="http://www.dwheeler.com/flawfinder">Flawfinder
version %s</a>,' % version)
- print('(C) 2001-2017 <a href="http://www.dwheeler.com">David A.
Wheeler</a>.')
+ print('<a href="https://dwheeler.com/flawfinder">Flawfinder
version %s</a>,' % version)
+ print('(C) 2001-2019 <a href="https://dwheeler.com">David A.
Wheeler</a>.')
else:
- print("Flawfinder version %s, (C) 2001-2017 David A. Wheeler." %
version)
+ print("Flawfinder version %s, (C) 2001-2019 David A. Wheeler." %
version)
displayed_header = 1
@@ -1689,7 +1761,7 @@
if (not allowlink) and os.path.islink(f):
if not quiet:
print_warning("Skipping symbolic link directory " + h(f))
- num_links_skipped = num_links_skipped + 1
+ num_links_skipped += 1
return
base_filename = os.path.basename(f)
if (skipdotdir and len(base_filename) > 1
@@ -1696,7 +1768,7 @@
and (base_filename[0] == ".")):
if not quiet:
print_warning("Skipping directory with initial dot " + h(f))
- num_dotdirs_skipped = num_dotdirs_skipped + 1
+ num_dotdirs_skipped += 1
return
for dir_entry in os.listdir(f):
maybe_process_file(os.path.join(f, dir_entry), patch_infos)
@@ -1712,7 +1784,7 @@
if (not allowlink) and os.path.islink(f):
if not quiet:
print_warning("Skipping symbolic link file " + h(f))
- num_links_skipped = num_links_skipped + 1
+ num_links_skipped += 1
elif not os.path.isfile(f):
# Skip anything not a normal file. This is so that
# device files, etc. won't cause trouble.
@@ -1742,7 +1814,7 @@
if (not allowlink) and os.path.islink(f):
if not quiet:
print_warning("Skipping symbolic link " + h(f))
- num_links_skipped = num_links_skipped + 1
+ num_links_skipped += 1
elif os.path.isfile(f) or f == "-":
# If on the command line, FORCE processing of it.
# Currently, we only process C/C++.
@@ -1757,9 +1829,27 @@
maybe_process_file(f, patch_infos)
elif not os.path.exists(f):
if not quiet:
- if h(f).startswith("\342\210\222"):
+ # Help humans avoid a long mysterious debugging session.
+ # Sometimes people copy/paste from HTML that has a leading
+ # en dash (\u2013 aka 0xE2 0x80 0x93) or
+ # em dash (\u2014 aka 0xE2 0x80 0x94) instead of the
+ # correct dash marker (in an attempt to make things "pretty").
+ # These symbols *look* like the regular dash
+ # option marker, but they are not the same characters.
+ # If there's no such file, give a special warning,
+ # because otherwise this can be extremely
+ # difficult for humans to notice. We'll do the check in
+ # this odd way so it works on both Python 2 and Python 3.
+ # (Python 3 wants \u...).
+ # Note that we *only* make this report if the file doesn't
+ # exist - if someone asks to process a file with this crazy
+ # name, and it exists, we'll process it without complaint.
+ if (h(f).startswith("\xe2\x80\x93") or
+ h(f).startswith("\xe2\x80\x94") or
+ h(f).startswith(u"\u2013") or
+ h(f).startswith(u"\u2014")):
print_warning(
- "Skipping non-existent filename starting with UTF-8
long dash "
+ "Skipping non-existent filename starting with em dash
or en dash "
+ h(f))
else:
print_warning("Skipping non-existent file " + h(f))
@@ -1843,6 +1933,14 @@
--quiet | -Q
Don't display status information (i.e., which files are being
examined) while the analysis is going on.
+ --error-level=LEVEL
+ Return a nonzero (false) error code if there is at least one
+ hit of LEVEL or higher. If a diffhitlist is provided,
+ hits noted in it are ignored.
+ This option can be useful within a continuous integration script,
+ especially if you mark known-okay lines as "flawfinder: ignore".
+ Usually you want level to be fairly high, such as 4 or 5.
+ By default, flawfinder returns 0 (true) on a successful run.
Hitlist Management:
--savehitlist=F
@@ -1862,10 +1960,11 @@
global show_context, show_inputs, allowlink, skipdotdir, omit_time
global output_format, minimum_level, show_immediately, single_line
global csv_output, csv_writer
+ global error_level
global required_regex, required_regex_compiled
global falsepositive
global show_columns, never_ignore, quiet, showheading, list_rules
- global loadhitlist, savehitlist, diffhitlist
+ global loadhitlist, savehitlist, diffhitlist_filename
global patch_file
try:
# Note - as a side-effect, this sets sys.argv[].
@@ -1874,6 +1973,7 @@
"falsepositive", "falsepositives", "columns", "listrules",
"omittime", "allowlink", "patch=", "followdotdir", "neverignore",
"regex=", "quiet", "dataonly", "html", "singleline", "csv",
+ "error-level=",
"loadhitlist=", "savehitlist=", "diffhitlist=", "version", "help"
])
for (opt, value) in optlist:
@@ -1912,6 +2012,8 @@
quiet = 1
showheading = 0
csv_writer = csv.writer(sys.stdout)
+ elif opt == "--error-level":
+ error_level = int(value)
elif opt == "--immediate" or opt == "-i":
show_immediately = 1
elif opt == "-n" or opt == "--neverignore":
@@ -1940,7 +2042,7 @@
if showheading:
print("Saving hitlist to", value)
elif opt == "--diffhitlist":
- diffhitlist = value
+ diffhitlist_filename = value
display_header()
if showheading:
print("Showing hits not in", value)
@@ -1969,7 +2071,10 @@
except getopt.error as text:
print("*** getopt error:", text)
usage()
- sys.exit(1)
+ sys.exit(16)
+ if output_format == 1 and list_rules == 1:
+ print('You cannot list rules in HTML format')
+ sys.exit(20)
def process_files():
@@ -1990,19 +2095,23 @@
process_file_args(files, patch_infos)
return True
+
def hitlist_sort_key(hit):
"""Sort key for hitlist."""
return (-hit.level, hit.filename, hit.line, hit.column, hit.name)
+
def show_final_results():
global hitlist
+ global error_level_exceeded
count = 0
count_per_level = {}
count_per_level_and_up = {}
- possible_levels = [0, 1, 2, 3, 4, 5] # Eliminate dependency on range
+ # Name levels directly, to avoid Python "range" (a Python 2/3 difference)
+ possible_levels = (0, 1, 2, 3, 4, 5)
for i in possible_levels: # Initialize count_per_level
count_per_level[i] = 0
- for i in possible_levels: # Initialize count_per_level
+ for i in possible_levels: # Initialize count_per_level_and_up
count_per_level_and_up[i] = 0
if show_immediately or not quiet: # Separate the final results.
print()
@@ -2017,28 +2126,23 @@
# <ul> so that the format differentiates each entry.
# I'm not using <ol>, because its numbers might be confused with
# the risk levels or line numbers.
- if diffhitlist:
- diff_file = open(diffhitlist)
+ if diffhitlist_filename:
+ diff_file = open(diffhitlist_filename, 'rb')
diff_hitlist = pickle.load(diff_file)
- if output_format:
- print("<ul>")
- for hit in hitlist:
- if hit not in diff_hitlist:
+ if output_format:
+ print("<ul>")
+ for hit in hitlist:
+ if not diffhitlist_filename or hit not in diff_hitlist:
+ count_per_level[hit.level] = count_per_level[hit.level] + 1
+ if hit.level >= minimum_level:
hit.show()
- count_per_level[hit.level] = count_per_level[hit.level] + 1
- count = count + 1
- if output_format:
- print("</ul>")
+ count += 1
+ if hit.level >= error_level:
+ error_level_exceeded = True
+ if output_format:
+ print("</ul>")
+ if diffhitlist_filename:
diff_file.close()
- else:
- if output_format:
- print("<ul>")
- for hit in hitlist:
- hit.show()
- count_per_level[hit.level] = count_per_level[hit.level] + 1
- if output_format:
- print("</ul>")
- count = len(hitlist)
# Done with list, show the post-hitlist summary.
if showheading:
if output_format:
@@ -2127,11 +2231,11 @@
print("There may be other security vulnerabilities; review your code!")
if output_format:
print("<br>")
- print("See '<a
href=\"http://www.dwheeler.com/secure-programs\">Secure Programming HOWTO</a>'")
- print("(<a
href=\"http://www.dwheeler.com/secure-programs\">http://www.dwheeler.com/secure-programs</a>)
for more information.")
+ print("See '<a
href=\"https://dwheeler.com/secure-programs\">Secure Programming HOWTO</a>'")
+ print("(<a
href=\"https://dwheeler.com/secure-programs\">https://dwheeler.com/secure-programs</a>)
for more information.")
else:
print("See 'Secure Programming HOWTO'")
- print("(http://www.dwheeler.com/secure-programs) for more
information.")
+ print("(https://dwheeler.com/secure-programs) for more
information.")
if output_format:
print("</body>")
print("</html>")
@@ -2154,10 +2258,11 @@
if process_files():
show_final_results()
save_if_desired()
+ return 1 if error_level_exceeded else 0
if __name__ == '__main__':
try:
- flawfind()
+ sys.exit(flawfind())
except KeyboardInterrupt:
print("*** Flawfinder interrupted")
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
_______________________________________________
BRL-CAD Source Commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-commits