Hello,
I did some debugging with bouds-checking and came up with attached patch.
I seriously doubt any one did use bounds checking in a large project before.
Currently I can use this now in a large multi threaded project. It still
needs some more testing so do not apply the patch yet.
I disabled some errors. For example if a bounded pointer is not found I
give no error. I also relaxed printing free errors.
There were some off by 1 errors in lib/bcheck.c and I needed to make the
code thread safe.
I used the patch to not link in libtcc1.a in shared objects when bounds
checking so I have only one memory pool.
This has to be documented because you cannot use this with dlopen for
example.
I also added the pthread library when bounds checking so it is now multi
threaded.
I found another problem with nocode_wanted when using sizeof().
Also the push/pop trick needed to push some more registers when more
parameters are passed in registers.
I probably forget to mention a lot a other changes. See the patch.
I only tested this on linux x86_64. There are for sure problems on other
targets.
Regards,
Herman
On 2019-11-28 17:41, Michael Matz wrote:
Hello again,
but to maybe be a bit more constructive:
On Thu, 28 Nov 2019, Michael Matz wrote:
I fixed this with some push/pop trickery.
I see, yeah, expanding calls during calls is broken as gfunc_call in the
generators doesn't generally leave a trace in vtop[] which registers are
currently holding values. I think you only need so push/pop si/di, as
cx/dx aren't used intentionally during reg-param setup.
(I think i386-gen.c has a simila bug with fastcall functions).
This probably could be
improved. I have now added a minimum patch so bounds checking works a
little bit. We need still to fix the shared lib reloc problems and the
malloc/free hooks.
Do we? Can we perhaps also simply declare bounds checking to work only
with the main executable? Or remove that whole feature altogether?
And perhaps another compromise: only conditionally enable tracking of
locals: Invent a new cmdline option (say, '-bb'), which sets
do_bounds_checking to 2. And only if it's > 1 you would also track
locals, whereas with == 1 you would only track arrays and structs.
Your decision, I think you can push this patch either with that change, or
without (but try to remove cx/dx from the push/pop). It doesn't make tccs
source code larger or uglier in any meaningful way, but does fix practical
bugs.
Ciao,
Michael.
diff --git a/lib/bcheck.c b/lib/bcheck.c
index 90f0ad2..3637f43 100644
--- a/lib/bcheck.c
+++ b/lib/bcheck.c
@@ -28,6 +28,16 @@
&& !defined(__OpenBSD__) \
&& !defined(__NetBSD__)
#include <malloc.h>
+#include <errno.h>
+#include <semaphore.h>
+static sem_t bounds_sem;
+#define INIT_SEM() sem_init (&bounds_sem, 0, 1)
+#define WAIT_SEM() while (sem_wait (&bounds_sem) < 0 && errno ==
EINTR);
+#define POST_SEM() sem_post (&bounds_sem)
+#else
+#define INIT_SEM()
+#define WAIT_SEM()
+#define POST_SEM()
#endif
#if !defined(_WIN32)
@@ -65,7 +75,7 @@
#define BOUND_T1_BITS 13
#define BOUND_T2_BITS 11
#define BOUND_T3_BITS (sizeof(size_t)*8 - BOUND_T1_BITS - BOUND_T2_BITS)
-#define BOUND_E_BITS (sizeof(size_t))
+#define BOUND_E_BITS (sizeof(size_t) == 4 ? 4 : 5)
#define BOUND_T1_SIZE ((size_t)1 << BOUND_T1_BITS)
#define BOUND_T2_SIZE ((size_t)1 << BOUND_T2_BITS)
@@ -94,6 +104,10 @@ void __bound_init(void);
void __bound_new_region(void *p, size_t size);
int __bound_delete_region(void *p);
+/* debug */
+void bound_dump(void);
+void bound_check(void);
+
#ifdef __attribute__
/* an __attribute__ macro is defined in the system headers */
#undef __attribute__
@@ -141,7 +155,7 @@ static BoundEntry *__bound_find_region(BoundEntry *e1, void
*p)
while (e != NULL) {
addr = (size_t)p;
addr -= e->start;
- if (addr <= e->size) {
+ if (addr < e->size) {
/* put region at the head */
tmp = e1->start;
e1->start = e->start;
@@ -149,6 +163,9 @@ static BoundEntry *__bound_find_region(BoundEntry *e1, void
*p)
tmp = e1->size;
e1->size = e->size;
e->size = tmp;
+ tmp = e1->is_invalid;
+ e1->is_invalid = e->is_invalid;
+ e->is_invalid = tmp;
return e1;
}
e = e->next;
@@ -185,21 +202,24 @@ void * FASTCALL __bound_ptr_add(void *p, size_t offset)
__bound_init();
+ WAIT_SEM ();
e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)];
e = (BoundEntry *)((char *)e +
((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) &
((BOUND_T2_SIZE - 1) << BOUND_E_BITS)));
addr -= e->start;
- if (addr > e->size) {
+ if (addr >= e->size) {
e = __bound_find_region(e, p);
addr = (size_t)p - e->start;
}
addr += offset;
- if (addr >= e->size) {
+ if (e != __bound_invalid_t2 && addr > e->size) {
fprintf(stderr,"%s %s: %p is outside of the region\n",
__FILE__, __FUNCTION__, p + offset);
+ POST_SEM ();
return INVALID_POINTER; /* return an invalid pointer */
}
+ POST_SEM ();
return p + offset;
}
@@ -215,23 +235,26 @@ void * FASTCALL __bound_ptr_indir ## dsize (void *p,
size_t offset) \
__FILE__, __FUNCTION__, p, (unsigned)offset); \
\
__bound_init(); \
+ WAIT_SEM (); \
e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)]; \
e = (BoundEntry *)((char *)e + \
((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) & \
((BOUND_T2_SIZE - 1) << BOUND_E_BITS))); \
addr -= e->start; \
- if (addr > e->size) { \
+ if (addr >= e->size) { \
e = __bound_find_region(e, p); \
addr = (size_t)p - e->start; \
} \
addr += offset + dsize; \
- if (addr > e->size) { \
+ if (e != __bound_invalid_t2 && addr > e->size) { \
fprintf(stderr,"%s %s: %p is outside of the region\n", \
__FILE__, __FUNCTION__, p + offset); \
+ POST_SEM (); \
return INVALID_POINTER; /* return an invalid pointer */ \
} \
dprintf(stderr, "%s %s: return p+offset = %p\n", \
__FILE__, __FUNCTION__, p + offset); \
+ POST_SEM (); \
return p + offset; \
}
@@ -262,8 +285,8 @@ void FASTCALL __bound_local_new(void *p1)
{
size_t addr, size, fp, *p = p1;
- dprintf(stderr, "%s, %s start p1=%p\n", __FILE__, __FUNCTION__, p);
GET_CALLER_FP(fp);
+ dprintf(stderr, "%s, %s local new p1=%p fp=%p\n", __FILE__, __FUNCTION__,
p, fp);
for(;;) {
addr = p[0];
if (addr == 0)
@@ -281,6 +304,7 @@ void FASTCALL __bound_local_delete(void *p1)
{
size_t addr, fp, *p = p1;
GET_CALLER_FP(fp);
+ dprintf(stderr, "%s, %s local delete p1=%p fp=%p\n", __FILE__,
__FUNCTION__, p, fp);
for(;;) {
addr = p[0];
if (addr == 0)
@@ -317,13 +341,17 @@ static BoundEntry *__bound_new_page(void)
static BoundEntry *bound_new_entry(void)
{
BoundEntry *e;
- e = libc_malloc(sizeof(BoundEntry));
+ restore_malloc_hooks();
+ e = malloc(sizeof(BoundEntry));
+ install_malloc_hooks();
return e;
}
static void bound_free_entry(BoundEntry *e)
{
- libc_free(e);
+ restore_malloc_hooks();
+ free(e);
+ install_malloc_hooks();
}
static BoundEntry *get_page(size_t index)
@@ -345,6 +373,8 @@ static void mark_invalid(size_t addr, size_t size)
BoundEntry *page;
size_t t1_start, t1_end, i, j, t2_start, t2_end;
+ dprintf(stderr, "mark_invalid: start = %lx, size = %lx\n", (unsigned long)
addr, (unsigned long) size);
+
start = addr;
end = addr + size;
@@ -354,9 +384,7 @@ static void mark_invalid(size_t addr, size_t size)
else
t2_end = 1 << (BOUND_T1_BITS + BOUND_T2_BITS);
-#if 0
- dprintf(stderr, "mark_invalid: start = %x %x\n", t2_start, t2_end);
-#endif
+ dprintf(stderr, "mark_invalid: start = %lx, end = %lx\n", (unsigned long)
t2_start, (unsigned long) t2_end);
/* first we handle full pages */
t1_start = (t2_start + BOUND_T2_SIZE - 1) >> BOUND_T2_BITS;
@@ -408,6 +436,7 @@ void __bound_init(void)
dprintf(stderr, "%s, %s() start\n", __FILE__, __FUNCTION__);
/* save malloc hooks and install bound check hooks */
+ INIT_SEM ();
install_malloc_hooks();
#ifndef BOUND_STATIC
@@ -508,9 +537,11 @@ static inline void add_region(BoundEntry *e,
e1->start = e->start;
e1->size = e->size;
e1->next = e->next;
+ e1->is_invalid = e->is_invalid;
e->start = start;
e->size = size;
e->next = e1;
+ e1->is_invalid = 0;
}
}
@@ -526,8 +557,9 @@ void __bound_new_region(void *p, size_t size)
__bound_init();
+ WAIT_SEM ();
start = (size_t)p;
- end = start + size;
+ end = start + size - 1;
t1_start = start >> (BOUND_T2_BITS + BOUND_T3_BITS);
t1_end = end >> (BOUND_T2_BITS + BOUND_T3_BITS);
@@ -579,6 +611,7 @@ void __bound_new_region(void *p, size_t size)
}
add_region(e, start, size);
}
+ POST_SEM ();
dprintf(stderr, "%s, %s end\n", __FILE__, __FUNCTION__);
}
@@ -591,7 +624,7 @@ static inline void delete_region(BoundEntry *e, void *p,
size_t empty_size)
addr = (size_t)p;
addr -= e->start;
- if (addr <= e->size) {
+ if (addr < e->size) {
/* region found is first one */
e1 = e->next;
if (e1 == NULL) {
@@ -603,6 +636,7 @@ static inline void delete_region(BoundEntry *e, void *p,
size_t empty_size)
e->start = e1->start;
e->size = e1->size;
e->next = e1->next;
+ e->is_invalid = e1->is_invalid;
bound_free_entry(e1);
}
} else {
@@ -614,7 +648,7 @@ static inline void delete_region(BoundEntry *e, void *p,
size_t empty_size)
if (e == NULL)
break;
addr = (size_t)p - e->start;
- if (addr <= e->size) {
+ if (addr < e->size) {
/* found: remove entry */
e1->next = e->next;
bound_free_entry(e);
@@ -632,10 +666,11 @@ int __bound_delete_region(void *p)
BoundEntry *page, *e, *e2;
size_t t1_start, t1_end, t2_start, t2_end, i;
- dprintf(stderr, "%s %s() start\n", __FILE__, __FUNCTION__);
+ dprintf(stderr, "%s %s(%p) start\n", __FILE__, __FUNCTION__, p);
__bound_init();
+ WAIT_SEM ();
start = (size_t)p;
t1_start = start >> (BOUND_T2_BITS + BOUND_T3_BITS);
t2_start = (start >> (BOUND_T3_BITS - BOUND_E_BITS)) &
@@ -648,15 +683,18 @@ int __bound_delete_region(void *p)
if (addr > e->size)
e = __bound_find_region(e, p);
/* test if invalid region */
- if (e->size == EMPTY_SIZE || (size_t)p != e->start)
+ if (e->size == EMPTY_SIZE || (size_t)p != e->start) {
+ POST_SEM ();
return -1;
+ }
+
/* compute the size we put in invalid regions */
if (e->is_invalid)
empty_size = INVALID_SIZE;
else
empty_size = EMPTY_SIZE;
size = e->size;
- end = start + size;
+ end = start + size - 1;
/* now we can free each entry */
t1_end = end >> (BOUND_T2_BITS + BOUND_T3_BITS);
@@ -702,6 +740,7 @@ int __bound_delete_region(void *p)
}
delete_region(e, p, empty_size);
}
+ POST_SEM ();
dprintf(stderr, "%s %s() end\n", __FILE__, __FUNCTION__);
@@ -713,8 +752,10 @@ int __bound_delete_region(void *p)
static size_t get_region_size(void *p)
{
size_t addr = (size_t)p;
+ size_t size;
BoundEntry *e;
+ WAIT_SEM ();
e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)];
e = (BoundEntry *)((char *)e +
((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) &
@@ -723,8 +764,11 @@ static size_t get_region_size(void *p)
if (addr > e->size)
e = __bound_find_region(e, p);
if (e->start != (size_t)p)
- return EMPTY_SIZE;
- return e->size;
+ size = EMPTY_SIZE;
+ else
+ size = e->size;
+ POST_SEM ();
+ return size;
}
/* patched memory functions */
@@ -763,17 +807,21 @@ static void restore_malloc_hooks(void)
static void *libc_malloc(size_t size)
{
void *ptr;
+ WAIT_SEM ();
restore_malloc_hooks();
ptr = malloc(size);
install_malloc_hooks();
+ POST_SEM ();
return ptr;
}
static void libc_free(void *ptr)
{
+ WAIT_SEM ();
restore_malloc_hooks();
free(ptr);
install_malloc_hooks();
+ POST_SEM ();
}
/* XXX: we should use a malloc which ensure that it is unlikely that
@@ -802,6 +850,7 @@ void *__bound_memalign(size_t size, size_t align, const
void *caller)
{
void *ptr;
+ WAIT_SEM ();
restore_malloc_hooks();
#ifndef HAVE_MEMALIGN
@@ -820,6 +869,7 @@ void *__bound_memalign(size_t size, size_t align, const
void *caller)
#endif
install_malloc_hooks();
+ POST_SEM ();
if (!ptr)
return NULL;
@@ -836,7 +886,11 @@ void __bound_free(void *ptr, const void *caller)
if (ptr == NULL)
return;
if (__bound_delete_region(ptr) != 0)
- bound_error("freeing invalid region");
+#if 0 /* glibc thread code fails with this */
+ bound_error("freeing invalid region")
+#endif
+ ;
+
libc_free(ptr);
}
@@ -856,7 +910,7 @@ void *__bound_realloc(void *ptr, size_t size, const void
*caller)
old_size = get_region_size(ptr);
if (old_size == EMPTY_SIZE)
bound_error("realloc'ing invalid pointer");
- memcpy(ptr1, ptr, old_size);
+ memcpy(ptr1, ptr, old_size < size ? old_size : size);
__bound_free(ptr, caller);
return ptr1;
}
@@ -875,8 +929,7 @@ void *__bound_calloc(size_t nmemb, size_t size)
}
#endif
-#if 0
-static void bound_dump(void)
+void bound_dump(void)
{
BoundEntry *page, *e;
size_t i, j;
@@ -888,11 +941,15 @@ static void bound_dump(void)
e = page + j;
/* do not print invalid or empty entries */
if (e->size != EMPTY_SIZE && e->start != 0) {
- fprintf(stderr, "%08x:",
- (i << (BOUND_T2_BITS + BOUND_T3_BITS)) +
- (j << BOUND_T3_BITS));
+ fprintf(stderr, "%016lx:",
+ (unsigned long)
+ ((i << (BOUND_T2_BITS + BOUND_T3_BITS)) +
+ (j << BOUND_T3_BITS)));
do {
- fprintf(stderr, " %08lx:%08lx", e->start, e->start +
e->size);
+ fprintf(stderr, " %p:%p(%u)",
+ (void *) e->start,
+ (void *) (e->start + e->size),
+ (unsigned)e->is_invalid);
e = e->next;
} while (e != NULL);
fprintf(stderr, "\n");
@@ -900,7 +957,6 @@ static void bound_dump(void)
}
}
}
-#endif
/* some useful checked functions */
diff --git a/tccelf.c b/tccelf.c
index 70e4f87..c35d5fc 100644
--- a/tccelf.c
+++ b/tccelf.c
@@ -1366,7 +1366,12 @@ ST_FUNC void tcc_add_runtime(TCCState *s1)
tcc_add_dll(s1, TCC_LIBGCC, 0);
}
#endif
- tcc_add_support(s1, TCC_LIBTCC1);
+#ifdef CONFIG_TCC_BCHECK
+ if (s1->do_bounds_check)
+ tcc_add_library_err(s1, "pthread");
+ if (s1->do_bounds_check == 0 || s1->output_type != TCC_OUTPUT_DLL)
+#endif
+ tcc_add_support(s1, TCC_LIBTCC1);
/* add crt end if not memory output */
if (s1->output_type != TCC_OUTPUT_MEMORY)
tcc_add_crt(s1, "crtn.o");
@@ -2814,6 +2819,7 @@ static int tcc_load_alacarte(TCCState *s1, int fd, int
size, int entrysize)
const char *ar_names, *p;
const uint8_t *ar_index;
ElfW(Sym) *sym;
+ Section *s;
data = tcc_malloc(size);
if (full_read(fd, data, size) != size)
@@ -2825,9 +2831,14 @@ static int tcc_load_alacarte(TCCState *s1, int fd, int
size, int entrysize)
do {
bound = 0;
for(p = ar_names, i = 0; i < nsyms; i++, p += strlen(p)+1) {
- sym_index = find_elf_sym(symtab_section, p);
+ s = symtab_section;
+ sym_index = find_elf_sym(s, p);
+ if(sym_index == 0) {
+ s = s1->dynsymtab_section;
+ sym_index = find_elf_sym(s, p);
+ }
if(sym_index) {
- sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
+ sym = &((ElfW(Sym) *)s->data)[sym_index];
if(sym->st_shndx == SHN_UNDEF) {
off = (entrysize == 4
? get_be32(ar_index + i * 4)
diff --git a/tccgen.c b/tccgen.c
index a6181b0..aeb3bbd 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -1375,7 +1375,7 @@ static void gbound(void)
vtop->r &= ~VT_MUSTBOUND;
/* if lvalue, then use checking code before dereferencing */
- if (vtop->r & VT_LVAL) {
+ if ((vtop->r & VT_LVAL) && !nocode_wanted) {
/* if not VT_BOUNDED value, then make one */
if (!(vtop->r & VT_BOUNDED)) {
lval_type = vtop->r & (VT_LVAL_TYPE | VT_LVAL);
@@ -7413,7 +7413,8 @@ static void decl_initializer_alloc(CType *type,
AttributeDef *ad, int r,
if ((r & VT_VALMASK) == VT_LOCAL) {
sec = NULL;
#ifdef CONFIG_TCC_BCHECK
- if (bcheck && (type->t & VT_ARRAY)) {
+ if (bcheck && ((type->t & VT_ARRAY) ||
+ (type->t & VT_BTYPE) == VT_STRUCT)) {
loc--;
}
#endif
@@ -7422,8 +7423,9 @@ static void decl_initializer_alloc(CType *type,
AttributeDef *ad, int r,
#ifdef CONFIG_TCC_BCHECK
/* handles bounds */
/* XXX: currently, since we do only one pass, we cannot track
- '&' operators, so we add only arrays */
- if (bcheck && (type->t & VT_ARRAY)) {
+ '&' operators, so we add only arrays/structs/unions */
+ if (bcheck && ((type->t & VT_ARRAY) ||
+ (type->t & VT_BTYPE) == VT_STRUCT)) {
addr_t *bounds_ptr;
/* add padding between regions */
loc--;
diff --git a/x86_64-gen.c b/x86_64-gen.c
index cc66b60..4eb2d9d 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -641,11 +641,15 @@ static addr_t func_bound_offset;
static unsigned long func_bound_ind;
#endif
-static void gen_static_call(int v)
+static void gen_bounds_call(int v)
{
Sym *sym = external_global_sym(v, &func_old_type);
oad(0xe8, 0);
+#ifdef TCC_TARGET_PE
greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
+#else
+ greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4);
+#endif
}
/* generate a bounded pointer addition */
@@ -654,6 +658,10 @@ ST_FUNC void gen_bounded_ptr_add(void)
/* save all temporary registers */
save_regs(0);
+ o(0x51525657); /* push $rdi/%rsi/%rdx/%rcx */
+ o(0x51415041); /* push $r8/%r9 */
+ o(0x53415241); /* push $r10/%r11 */
+
/* prepare fast x86_64 function call */
gv(RC_RAX);
o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
@@ -664,12 +672,15 @@ ST_FUNC void gen_bounded_ptr_add(void)
vtop--;
/* do a fast function call */
- gen_static_call(TOK___bound_ptr_add);
+ gen_bounds_call(TOK___bound_ptr_add);
/* returned pointer is in rax */
vtop++;
vtop->r = TREG_RAX | VT_BOUNDED;
+ o(0x5a415b41); /* pop $r11/%r10 */
+ o(0x58415941); /* pop $r9/%r8 */
+ o(0x5f5e5a59); /* pop $rcx/$rdx/$rsi/%rdi */
/* relocation offset of the bounding function call point */
vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
@@ -1574,7 +1585,8 @@ void gfunc_prolog(CType *func_type)
if (tcc_state->do_bounds_check) {
func_bound_offset = lbounds_section->data_offset;
func_bound_ind = ind;
- oad(0xb8, 0); /* lbound section pointer */
+ o(0xb848); /* lbound section pointer */
+ gen_le64 (0);
o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr
*/
oad(0xb8, 0); /* call to function */
}
@@ -1603,17 +1615,18 @@ void gfunc_epilog(void)
func_bound_offset,
lbounds_section->data_offset);
saved_ind = ind;
ind = func_bound_ind;
- greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0);
- ind = ind + 5 + 3;
- gen_static_call(TOK___bound_local_new);
+ greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
+ ind = ind + 10 + 3;
+ gen_bounds_call(TOK___bound_local_new);
ind = saved_ind;
/* generate bound check local freeing */
o(0x5250); /* save returned value, if any */
- greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0);
- oad(0xb8, 0); /* mov xxx, %rax */
+ greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
+ o(0xb848); /* mov xxx, %rax */
+ gen_le64 (0);
o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
- gen_static_call(TOK___bound_local_delete);
+ gen_bounds_call(TOK___bound_local_delete);
o(0x585a); /* restore returned value, if any */
}
#endif
@@ -1940,6 +1953,7 @@ void gen_opf(int op)
v1.c.i = fc;
load(r, &v1);
fc = 0;
+ vtop->r = r = r | VT_LVAL;
}
if (op == TOK_EQ || op == TOK_NE) {
@@ -2007,6 +2021,7 @@ void gen_opf(int op)
v1.c.i = fc;
load(r, &v1);
fc = 0;
+ vtop->r = r = r | VT_LVAL;
}
assert(!(vtop[-1].r & VT_LVAL));
_______________________________________________
Tinycc-devel mailing list
Tinycc-devel@nongnu.org
https://lists.nongnu.org/mailman/listinfo/tinycc-devel