Bug#1051900: ohcount: aborts with segfaul or bus error 90% of the time on arm64

2024-01-13 Thread Sylvestre Ledru

Hello

First, sorry for the latency.

I tried to upload it but two tests are failing. Does it ring a bell ? 
(they are regressions from the patch)




Error: test_diff(SourceFileTest):
  NoMethodError: undefined method `language' for nil:NilClass

          assert_equal "shell", deltas.first.language
            ^
/build/ohcount-4.0.0/test/unit/ruby/source_file_test.rb:12:in `test_diff'
  9:         assert_equal optimer, new.contents
 10:         deltas = old.diff(new).loc_deltas
 11:         assert_not_nil deltas
  => 12:         assert_equal "shell", deltas.first.language
 13:     end
 14:
 15:     def test_empty_diff

E

Error: test_empty_diff(SourceFileTest):
  NoMethodError: undefined method `language' for nil:NilClass

          assert_equal "perl", deltas.first.language
           ^
/build/ohcount-4.0.0/test/unit/ruby/source_file_test.rb:23:in 
`test_empty_diff'

 20:         assert_equal c, new.contents
 21:         deltas = old.diff(new).loc_deltas
 22:         assert_not_nil deltas
  => 23:         assert_equal "perl", deltas.first.language
 24:     end
 25: end

.


Thanks

Sylvestre



Bug#1051900: ohcount: aborts with segfaul or bus error 90% of the time on arm64

2023-09-15 Thread Michael Cree
Tags: patch

Further to my prior report it is not the use of variable length array
but the overwriting the end of the array that is the problem. This bug
is repeated multiple times throughout the source file src/detector.c.
It appears that there have been attempts to fix a couple of
occurrences of the bug but even that has been cocked up. There does
not seem to be any understanding by the authors of the code that
strlen() returns the length of a string less the terminating zero.

It is astonishing that this code successfully runs on any architecture
whatsoever given the multiple array overruns!

I attach a patch that fixes many occurrences of the bug.  This is
sufficient to get ohcount running on itself on arm64 without crashing.

There are also occurrences of array underruns in the code. For example,
about line 615 of src/detector.c there is the following:

p = line;
while (p < eol) {
  if (islower(*p) && p != bof && !isalnum(*(p - 1)) && *(p - 1) != '_') {

On the first iteration of the while loop in the if statement one byte
before the start of array line can be read.  That is undefined
behaviour.  I have not fixed this in the attached patch, but it should
be reported upstream.  Frankly, the code needs a full audit for issues
such as these.

Regards,
Michael.
--- a/src/detector.c
+++ b/src/detector.c
@@ -46,7 +46,7 @@
   while (isalnum(*pe)) pe++;
   length = pe - p;
   strncpy(buf, p, length);
-  buf[length] = '\0';
+  buf[79] = '\0';
   struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
   if (rl) return(rl->name);
 }
@@ -63,7 +63,7 @@
 } while (*p == '-'); // Skip over any switches.
 length = pe - p;
 strncpy(buf, p, length);
-buf[length] = '\0';
+buf[79] = '\0';
 struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
 if (rl) return(rl->name);
   } else if (strstr(line, "xml")) return(LANG_XML);
@@ -146,7 +146,7 @@
 while (pe < eof) {
   // Get the contents of the first line.
   while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
-  length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
+  length = (pe - p <= sizeof(line)-1) ? pe - p : sizeof(line)-1;
   strncpy(line, p, length);
   line[length] = '\0';
   if (*line == '#' && *(line + 1) == '!') {
@@ -166,7 +166,7 @@
   }
   pe = p;
   while (!isspace(*pe) && *pe != ';' && pe != strstr(pe, "-*-")) pe++;
-  length = (pe - p <= sizeof(buf)) ? pe - p : sizeof(buf);
+  length = (pe - p <= sizeof(buf)-1) ? pe - p : sizeof(buf)-1;
   strncpy(buf, p, length);
   buf[length] = '\0';
 
@@ -236,7 +236,7 @@
 if (p && pe) {
   p += 3;
   const int length = pe - p;
-  char buf[length];
+  char buf[length+1];
   strncpy(buf, p, length);
   buf[length] = '\0';
   char *eol = buf + strlen(buf);
@@ -550,7 +550,7 @@
   // If the directory contains a matching *.m file, likely Objective C.
   length = strlen(sourcefile->filename);
   if (strcmp(sourcefile->ext, "h") == 0) {
-char path[length];
+char path[length+1];
 strncpy(path, sourcefile->filename, length);
 path[length] = '\0';
 *(path + length - 1) = 'm';
@@ -572,7 +572,7 @@
   while (pe < eof) {
 // Get a line at a time.
 while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
-length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
+length = (pe - p <= sizeof(line)-1) ? pe - p : sizeof(line)-1;
 strncpy(line, p, length);
 line[length] = '\0';
 char *eol = line + strlen(line);
@@ -594,7 +594,7 @@
   while (pe < eol && *pe != '>' && *pe != '"') pe++;
   length = pe - p;
   strncpy(buf, p, length);
-  buf[length] = '\0';
+  buf[80] = '\0';
   if (ohcount_hash_is_cppheader(buf, length))
 return LANG_CPP;
   // Is the extension for the header file a C++ file?
@@ -602,7 +602,7 @@
   while (p > line && *(p - 1) != '.') p--;
   length = pe - p;
   strncpy(buf, p, length);
-  buf[length] = '\0';
+  buf[80] = '\0';
   struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
   if (re && strcmp(re->value, LANG_CPP) == 0)
 return LANG_CPP;
@@ -619,7 +619,7 @@
 if (!isalnum(*pe) && *pe != '_') {
   length = pe - p;
   strncpy(buf, p, length);
-  buf[length] = '\0';
+  buf[80] = '\0';
   if (strcmp(buf, "class") == 0 ||
   strcmp(buf, "namespace") == 0 ||
   strcmp(buf, "template") == 0 ||
@@ -650,7 +650,7 @@
   if (strstr(p, ".") <= pe) {
 // Only if the filename has an extension prior to the .in
 length = pe - p;
-char buf[length];
+char buf[length+1];
 strncpy(buf, p, length);
 buf[length] = '\0';
 p = ohcount_sourcefile_get_contents(sourcefile);
@@ -741,7 +741,7 @@
   while (pe < eof) {
 // Get a line at a time.
 while (

Bug#1051900: ohcount: aborts with segfaul or bus error 90% of the time on arm64

2023-09-15 Thread Michael Cree
On Wed, Sep 13, 2023 at 06:52:08PM -0300, Antonio Terceiro wrote:
> ohcount segfaults (and sometimes aborts with a Bus error) on arm64,
> almost 90% of the time. I tried this on an up to date arm64 Debian

Running ohcount under gdb traps on the segfault but can't get a
backtrace due to a corrupted stack.  So I recompiled ohcount with
the address sanitiser which traps on the segfault with the following:

=
==14540==ERROR: AddressSanitizer: dynamic-stack-buffer-overflow on address 
0xeab309b4 at pc 0xacf8bd38 bp 0xeab30960 sp 0xeab30978
WRITE of size 1 at 0xeab309b4 thread T0
#0 0xacf8bd34 in disambiguate_aspx src/detector.c:241
#1 0xacf8ba80 in ohcount_detect_language src/detector.c:221
#2 0xacf87304 in ohcount_sourcefile_get_language src/sourcefile.c:128
#3 0xad1fb5d0 in ohcount_parse src/parser.c:16
#4 0xacf879cc in ohcount_sourcefile_parse src/sourcefile.c:195
#5 0xacf87be0 in ohcount_sourcefile_get_loc_list src/sourcefile.c:239
#6 0xacf88f48 in ohcount_sourcefile_list_analyze_languages 
src/sourcefile.c:404
#7 0xacf8582c in summary src/ohcount.c:210
#8 0xacf86394 in main src/ohcount.c:302
#9 0xa95f777c in __libc_start_call_main 
../sysdeps/nptl/libc_start_call_main.h:58
#10 0xa95f7854 in __libc_start_main_impl ../csu/libc-start.c:360
#11 0xacf840ac in _start 
(/home/mjc/debian/ohcount/ohcount-4.0.0/bin/ohcount+0x240ac)

Address 0xeab309b4 is located in stack of thread T0
SUMMARY: AddressSanitizer: dynamic-stack-buffer-overflow src/detector.c:241 in 
disambiguate_aspx
Shadow bytes around the buggy address:
  0x200ffd5660e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200ffd5660f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200ffd566100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200ffd566110: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200ffd566120: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
=>0x200ffd566130: ca ca ca ca 00 00[04]cb cb cb cb cb 00 00 00 00
  0x200ffd566140: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200ffd566150: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200ffd566160: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200ffd566170: 00 00 00 00 f1 f1 f1 f1 00 00 00 00 00 00 00 00
  0x200ffd566180: 00 00 01 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 00
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:   00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:   fa
  Freed heap region:   fd
  Stack left redzone:  f1
  Stack mid redzone:   f2
  Stack right redzone: f3
  Stack after return:  f5
  Stack use after scope:   f8
  Global redzone:  f9
  Global init order:   f6
  Poisoned by user:f7
  Container overflow:  fc
  Array cookie:ac
  Intra object redzone:bb
  ASan internal:   fe
  Left alloca redzone: ca
  Right alloca redzone:cb
==14540==ABORTING

The code for disambiguate_aspx() where the segfaults occurs is:


const char *disambiguate_aspx(SourceFile *sourcefile) {
  char *p = ohcount_sourcefile_get_contents(sourcefile);
  char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
  for (; p < eof; p++) {
// /<%@\s*Page[^>]+Language="VB"[^>]+%>/
p = strstr(p, "<%@");
if (!p)
break;
char *pe = strstr(p, "%>");
if (p && pe) {
  p += 3;
  const int length = pe - p;
  char buf[length];
  strncpy(buf, p, length);
  buf[length] = '\0';
  char *eol = buf + strlen(buf);
  for (p = buf; p < eol; p++) *p = tolower(*p);
  p = buf;
  while (*p == ' ' || *p == '\t') p++;
  if (strncmp(p, "page", 4) == 0) {
p += 4;
if (strstr(p, "language=\"vb\""))
  return LANG_VB_ASPX;
  }
}
  }
  return LANG_CS_ASPX;
}


Line 241 is the line with: buf[length] = '\0';

We see that buf is declared two lines above as a variable length
array. Being a local variable I assume that it is allocated on the
stack, which is dangerous if its length turns out to be too large
for the stack. Presumably that is the problem.

Cheers,
Michael.



Bug#1051900: ohcount: aborts with segfaul or bus error 90% of the time on arm64

2023-09-13 Thread Antonio Terceiro
Package: ohcount
Version: 4.0.0-3
Severity: grave
Justification: renders package unusable
X-Debbugs-Cc: debian-...@lists.debian.org

Dear Maintainer,

ohcount segfaults (and sometimes aborts with a Bus error) on arm64,
almost 90% of the time. I tried this on an up to date arm64 Debian
testing against the hexchat source code, but it's also reproducible on
the ohcount source code itself. The same test, when performced on an up
to date amd64 Debian testing, finishes successfully 100% of the time.

For example this is a sample session with 10 runs against the source of
ohcount itself:

$ ohcount
Examining 1192 file(s)
Segmentation fault
$ ohcount
Examining 1192 file(s)
Bus error
$ ohcount
Examining 1192 file(s)
Bus error
$ ohcount
Examining 1192 file(s)
Bus error
$ ohcount
Examining 1192 file(s)
Bus error
$ ohcount
Examining 1192 file(s)
Segmentation fault
$ ohcount
Examining 1192 file(s)
Segmentation fault
$ ohcount
Examining 1192 file(s)
Segmentation fault
$ ohcount
Examining 1192 file(s)
Segmentation fault
$ ohcount
Examining 1192 file(s)
Bus error

-- System Information:
Debian Release: trixie/sid
  APT prefers testing
  APT policy: (900, 'testing'), (500, 'unstable'), (1, 'experimental')
Architecture: arm64 (aarch64)

Kernel: Linux 6.4.0-3-arm64 (SMP w/32 CPU threads)
Kernel taint flags: TAINT_UNSIGNED_MODULE
Locale: LANG=pt_BR.UTF-8, LC_CTYPE=pt_BR.UTF-8 (charmap=UTF-8), LANGUAGE not set
Shell: /bin/sh linked to /usr/bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

Versions of packages ohcount depends on:
ii  file   1:5.45-2
ii  libc6  2.37-7
ii  libmagic1  1:5.45-2
ii  libpcre3   2:8.39-15
ii  ruby   1:3.1
ii  ruby-diff-lcs  1.5.0-1

ohcount recommends no packages.

Versions of packages ohcount suggests:
pn  ohcount-doc  

-- no debconf information


signature.asc
Description: PGP signature