I think I have a working patch now. See attachment.
The code runs on i386 and x86_64 on linux for large projects I have.

I found some bugs when updating the i386 code.
The bugs are in the testcases in the patch.
The test/boundtest.c code works for both targets.
See below for the main changes.

I probably need some feedback now.

Regards,

    Herman

On 2019-12-02 22:46, Herman ten Brugge wrote:
Little updated patch. Still needs more work.

Regards,

    Herman

On 2019-12-02 18:24, Herman ten Brugge wrote:
Hello,

I did some debugging with bouds-checking and came up with attached patch. I seriously doubt any one did use bounds checking in a large project before.

Currently I can use this now in a large multi threaded project. It still needs some more testing so do not apply the patch yet.

I disabled some errors. For example if a bounded pointer is not found I give no error. I also relaxed printing free errors. There were some off by 1 errors in lib/bcheck.c and I needed to make the code thread safe. I used the patch to not link in libtcc1.a in shared objects when bounds checking so I have only one memory pool. This has to be documented because you cannot use this with dlopen for example. I also added the pthread library when bounds checking so it is now multi threaded.
I found another problem with nocode_wanted when using sizeof().
Also the push/pop trick needed to push some more registers when more parameters are passed in registers.

I probably forget to mention a lot a other changes. See the patch.

I only tested this on linux x86_64. There are for sure problems on other targets.

Regards,

    Herman


On 2019-11-28 17:41, Michael Matz wrote:
Hello again,

but to maybe be a bit more constructive:

On Thu, 28 Nov 2019, Michael Matz wrote:

I fixed this with some push/pop trickery.
I see, yeah, expanding calls during calls is broken as gfunc_call in the generators doesn't generally leave a trace in vtop[] which registers are
currently holding values.  I think you only need so push/pop si/di, as
cx/dx aren't used intentionally during reg-param setup.

(I think i386-gen.c has a simila bug with fastcall functions).

This probably could be
improved. I have now added a minimum patch so bounds checking works a
little bit. We need still to fix the shared lib reloc problems and the
malloc/free hooks.
Do we?  Can we perhaps also simply declare bounds checking to work only
with the main executable?  Or remove that whole feature altogether?
And perhaps another compromise: only conditionally enable tracking of
locals: Invent a new cmdline option (say, '-bb'), which sets
do_bounds_checking to 2.  And only if it's > 1 you would also track
locals, whereas with == 1 you would only track arrays and structs.

Your decision, I think you can push this patch either with that change, or without (but try to remove cx/dx from the push/pop).  It doesn't make tccs source code larger or uglier in any meaningful way, but does fix practical
bugs.


Ciao,
Michael.



diff --git a/arm-gen.c b/arm-gen.c
index b93d298..2b220e7 100644
--- a/arm-gen.c
+++ b/arm-gen.c
@@ -1264,8 +1264,9 @@ void gfunc_call(int nb_args)
 }
 
 /* generate function prolog of type 't' */
-void gfunc_prolog(CType *func_type)
+void gfunc_prolog(Sym *func_sym)
 {
+  CType *func_type = &func_sym->type;
   Sym *sym,*sym2;
   int n, nf, size, align, rs, struct_ret = 0;
   int addr, pn, sn; /* pn=core, sn=stack */
diff --git a/arm64-gen.c b/arm64-gen.c
index 463541f..121108e 100644
--- a/arm64-gen.c
+++ b/arm64-gen.c
@@ -996,8 +996,9 @@ static int arm64_func_va_list_gr_offs;
 static int arm64_func_va_list_vr_offs;
 static int arm64_func_sub_sp_offset;
 
-ST_FUNC void gfunc_prolog(CType *func_type)
+ST_FUNC void gfunc_prolog(Sym *func_sym)
 {
+    CType *func_type = &func_sym->type;
     int n = 0;
     int i = 0;
     Sym *sym;
diff --git a/c67-gen.c b/c67-gen.c
index 880a572..c1e15ac 100644
--- a/c67-gen.c
+++ b/c67-gen.c
@@ -1939,8 +1939,9 @@ void gfunc_call(int nb_args)
 // parameters are loaded and restored upon return (or if/when needed).
 
 /* generate function prolog of type 't' */
-void gfunc_prolog(CType * func_type)
+void gfunc_prolog(Sym *func_sym)
 {
+    CType *func_type = &func_sym->type;
     int addr, align, size, func_call, i;
     Sym *sym;
     CType *type;
diff --git a/i386-gen.c b/i386-gen.c
index 51fbf07..9b1fdd5 100644
--- a/i386-gen.c
+++ b/i386-gen.c
@@ -512,10 +512,12 @@ ST_FUNC void gfunc_call(int nb_args)
 #endif
 
 /* generate function prolog of type 't' */
-ST_FUNC void gfunc_prolog(CType *func_type)
+ST_FUNC void gfunc_prolog(Sym *func_sym)
 {
+    CType *func_type = &func_sym->type;
     int addr, align, size, func_call, fastcall_nb_regs;
     int param_index, param_addr;
+    int n_arg = 0;
     uint8_t *fastcall_regs_ptr;
     Sym *sym;
     CType *type;
@@ -558,6 +560,7 @@ ST_FUNC void gfunc_prolog(CType *func_type)
     }
     /* define parameters */
     while ((sym = sym->next) != NULL) {
+        n_arg++;
         type = &sym->type;
         size = type_size(type, &align);
         size = (size + 3) & ~3;
@@ -597,6 +600,12 @@ ST_FUNC void gfunc_prolog(CType *func_type)
         func_bound_ind = ind;
         oad(0xb8, 0); /* lbound section pointer */
         oad(0xb8, 0); /* call to function */
+        if (n_arg >= 2 && strcmp (get_tok_str(func_sym->v, NULL), "main") == 
0) {
+            o(0x0c458b);  /* mov  0x12(%ebp),%eax */
+            o(0x50);      /* push %eax */
+            gen_static_call(TOK___bound_main_arg);
+            o(0x04c483);  /* add  $0x4,%esp */
+        }
     }
 #endif
 }
@@ -1003,6 +1012,7 @@ ST_FUNC void gen_cvt_itof(int t)
         o(0x2404db); /* fildl (%esp) */
         o(0x04c483); /* add $4, %esp */
     }
+    vtop->r2 = VT_CONST;
     vtop->r = TREG_ST0;
 }
 
diff --git a/lib/Makefile b/lib/Makefile
index 969d8e2..1750fa1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,6 +19,8 @@ XCFG = $(or $(findstring -win,$T),-unx)
 
 # in order to use gcc, tyoe: make <target>-libtcc1-usegcc=yes
 arm-libtcc1-usegcc ?= no
+x86_64-libtcc1-usegcc ?= no
+i386-libtcc1-usegcc ?= no
 
 ifeq "$($(T)-libtcc1-usegcc)" "yes"
  XCC = $(CC)
@@ -39,6 +41,8 @@ ifdef CONFIG_OSX
  XFLAGS += -D_ANSI_SOURCE
 endif
 
+XFLAGS += -g -Wno-deprecated-declarations
+
 I386_O = libtcc1.o alloca86.o alloca86-bt.o
 X86_64_O = libtcc1.o alloca86_64.o alloca86_64-bt.o
 ARM_O = libtcc1.o armeabi.o alloca-arm.o armflush.o
diff --git a/lib/bcheck.c b/lib/bcheck.c
index 90f0ad2..6854b4e 100644
--- a/lib/bcheck.c
+++ b/lib/bcheck.c
@@ -28,16 +28,28 @@
  && !defined(__OpenBSD__) \
  && !defined(__NetBSD__)
 #include <malloc.h>
+#include <errno.h>
+#include <semaphore.h>
+static sem_t bounds_sem;
+#define INIT_SEM()  sem_init (&bounds_sem, 0, 1)
+#define WAIT_SEM()  while (sem_wait (&bounds_sem) < 0 && errno == EINTR);
+#define POST_SEM()  sem_post (&bounds_sem)
+#define HAS_ENVIRON 1
+#else
+#define INIT_SEM()
+#define WAIT_SEM()
+#define POST_SEM()
+#define HAS_ENVIRON 0
 #endif
 
 #if !defined(_WIN32)
 #include <unistd.h>
 #endif
 
-/* #define BOUND_DEBUG */
+#define BOUND_DEBUG
 
 #ifdef BOUND_DEBUG
- #define dprintf(a...) fprintf(a)
+ #define dprintf(a...) if (print_calls) fprintf(a)
 #else
  #define dprintf(a...)
 #endif
@@ -65,7 +77,7 @@
 #define BOUND_T1_BITS 13
 #define BOUND_T2_BITS 11
 #define BOUND_T3_BITS (sizeof(size_t)*8 - BOUND_T1_BITS - BOUND_T2_BITS)
-#define BOUND_E_BITS  (sizeof(size_t))
+#define BOUND_E_BITS  (sizeof(size_t) == 4 ? 4 : 5)
 
 #define BOUND_T1_SIZE ((size_t)1 << BOUND_T1_BITS)
 #define BOUND_T2_SIZE ((size_t)1 << BOUND_T2_BITS)
@@ -94,6 +106,9 @@ void __bound_init(void);
 void __bound_new_region(void *p, size_t size);
 int __bound_delete_region(void *p);
 
+/* debug */
+void bound_dump(void);
+
 #ifdef __attribute__
   /* an __attribute__ macro is defined in the system headers */
   #undef __attribute__ 
@@ -132,6 +147,9 @@ static BoundEntry **__bound_t1; /* page table */
 static BoundEntry *__bound_empty_t2;   /* empty page, for unused pages */
 static BoundEntry *__bound_invalid_t2; /* invalid page, for invalid pointers */
 
+static int print_calls = 0;
+static int never_fatal = 0;
+
 static BoundEntry *__bound_find_region(BoundEntry *e1, void *p)
 {
     size_t addr, tmp;
@@ -141,7 +159,7 @@ static BoundEntry *__bound_find_region(BoundEntry *e1, void 
*p)
     while (e != NULL) {
         addr = (size_t)p;
         addr -= e->start;
-        if (addr <= e->size) {
+        if (addr < e->size) {
             /* put region at the head */
             tmp = e1->start;
             e1->start = e->start;
@@ -149,6 +167,30 @@ static BoundEntry *__bound_find_region(BoundEntry *e1, 
void *p)
             tmp = e1->size;
             e1->size = e->size;
             e->size = tmp;
+            tmp = e1->is_invalid;
+            e1->is_invalid = e->is_invalid;
+            e->is_invalid = tmp;
+            return e1;
+        }
+        e = e->next;
+    }
+    /* no entry found: return empty entry or invalid entry */
+    if (e1->is_invalid)
+        return __bound_invalid_t2;
+    else
+        return __bound_empty_t2;
+}
+
+static BoundEntry *__bound_find_region_end(BoundEntry *e1, void *p)
+{
+    size_t addr;
+    BoundEntry *e;
+
+    e = e1;
+    while (e != NULL) {
+        addr = (size_t)p;
+        addr -= e->start;
+        if (addr == e->size) {
             return e1;
         }
         e = e->next;
@@ -165,7 +207,8 @@ static void bound_error(const char *fmt, ...)
 {
     __bound_error_msg = fmt;
     fprintf(stderr,"%s %s: %s\n", __FILE__, __FUNCTION__, fmt);
-    *(void **)0 = 0; /* force a runtime error */
+    if (never_fatal == 0)
+        *(void **)0 = 0; /* force a runtime error */
 }
 
 static void bound_alloc_error(void)
@@ -179,27 +222,41 @@ void * FASTCALL __bound_ptr_add(void *p, size_t offset)
 {
     size_t addr = (size_t)p;
     BoundEntry *e;
+    BoundEntry *s;
 
-    dprintf(stderr, "%s %s: %p %x\n",
+    dprintf(stderr, "%s %s: %p 0x%x\n",
         __FILE__, __FUNCTION__, p, (unsigned)offset);
 
     __bound_init();
 
+    WAIT_SEM ();
     e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)];
     e = (BoundEntry *)((char *)e + 
                        ((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) & 
                         ((BOUND_T2_SIZE - 1) << BOUND_E_BITS)));
+    s = e;
     addr -= e->start;
-    if (addr > e->size) {
+    if (addr >= e->size) {
         e = __bound_find_region(e, p);
         addr = (size_t)p - e->start;
     }
     addr += offset;
-    if (addr >= e->size) {
-       fprintf(stderr,"%s %s: %p is outside of the region\n",
-            __FILE__, __FUNCTION__, p + offset);
-        return INVALID_POINTER; /* return an invalid pointer */
+    if (e->size == EMPTY_SIZE || addr >= e->size) {
+        if (e->size == EMPTY_SIZE || e->is_invalid) {
+           e = __bound_find_region_end(s, p);
+           addr = (size_t)p - e->start;
+           addr += offset;
+        }
+        if (!e->is_invalid && addr >= e->size) {
+            fprintf(stderr,"%s %s: %p is outside of the region\n",
+                    __FILE__, __FUNCTION__, p + offset);
+            if (never_fatal == 0) {
+                POST_SEM ();
+                return INVALID_POINTER; /* return an invalid pointer */
+            }
+        }
     }
+    POST_SEM ();
     return p + offset;
 }
 
@@ -210,28 +267,42 @@ void * FASTCALL __bound_ptr_indir ## dsize (void *p, 
size_t offset)     \
 {                                                                       \
     size_t addr = (size_t)p;                                            \
     BoundEntry *e;                                                      \
+    BoundEntry *s;                                                      \
                                                                         \
-    dprintf(stderr, "%s %s: %p %x start\n",                             \
-        __FILE__, __FUNCTION__, p, (unsigned)offset);                  \
-                                                                       \
-    __bound_init();                                                    \
+    dprintf(stderr, "%s %s: %p 0x%x start\n",                           \
+        __FILE__, __FUNCTION__, p, (unsigned)offset);                   \
+                                                                        \
+    __bound_init();                                                     \
+    WAIT_SEM ();                                                        \
     e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)];            \
     e = (BoundEntry *)((char *)e +                                      \
                        ((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) &      \
                         ((BOUND_T2_SIZE - 1) << BOUND_E_BITS)));        \
+    s = e;                                                              \
     addr -= e->start;                                                   \
-    if (addr > e->size) {                                               \
+    if (addr >= e->size) {                                              \
         e = __bound_find_region(e, p);                                  \
         addr = (size_t)p - e->start;                                    \
     }                                                                   \
     addr += offset + dsize;                                             \
-    if (addr > e->size) {                                               \
-       fprintf(stderr,"%s %s: %p is outside of the region\n",          \
-            __FILE__, __FUNCTION__, p + offset);                        \
-        return INVALID_POINTER; /* return an invalid pointer */         \
-    }                                                                  \
+    if (e->size == EMPTY_SIZE || addr > e->size) {                      \
+        if (e->size == EMPTY_SIZE || e->is_invalid) {                   \
+            e = __bound_find_region_end(s, p);                          \
+            addr = (size_t)p - e->start;                                \
+            addr += offset + dsize;                                     \
+        }                                                               \
+        if (!e->is_invalid && addr > e->size) {                         \
+            fprintf(stderr,"%s %s: %p is outside of the region\n",      \
+                __FILE__, __FUNCTION__, p + offset);                    \
+            if (never_fatal == 0) {                                     \
+                POST_SEM ();                                            \
+                return INVALID_POINTER; /* return an invalid pointer */ \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
     dprintf(stderr, "%s %s: return p+offset = %p\n",                    \
         __FILE__, __FUNCTION__, p + offset);                            \
+    POST_SEM ();                                                        \
     return p + offset;                                                  \
 }
 
@@ -262,8 +333,8 @@ void FASTCALL __bound_local_new(void *p1)
 {
     size_t addr, size, fp, *p = p1;
 
-    dprintf(stderr, "%s, %s start p1=%p\n", __FILE__, __FUNCTION__, p);
     GET_CALLER_FP(fp);
+    dprintf(stderr, "%s, %s local new p1=%p fp=%p\n", __FILE__, __FUNCTION__, 
p, (void *)fp);
     for(;;) {
         addr = p[0];
         if (addr == 0)
@@ -281,6 +352,7 @@ void FASTCALL __bound_local_delete(void *p1)
 {
     size_t addr, fp, *p = p1;
     GET_CALLER_FP(fp);
+    dprintf(stderr, "%s, %s local delete p1=%p fp=%p\n", __FILE__, 
__FUNCTION__, p, (void *)fp);
     for(;;) {
         addr = p[0];
         if (addr == 0)
@@ -300,7 +372,9 @@ static BoundEntry *__bound_new_page(void)
     BoundEntry *page;
     size_t i;
 
-    page = libc_malloc(sizeof(BoundEntry) * BOUND_T2_SIZE);
+    restore_malloc_hooks();
+    page = malloc(sizeof(BoundEntry) * BOUND_T2_SIZE);
+    install_malloc_hooks();
     if (!page)
         bound_alloc_error();
     for(i=0;i<BOUND_T2_SIZE;i++) {
@@ -317,13 +391,17 @@ static BoundEntry *__bound_new_page(void)
 static BoundEntry *bound_new_entry(void)
 {
     BoundEntry *e;
-    e = libc_malloc(sizeof(BoundEntry));
+    restore_malloc_hooks();
+    e = malloc(sizeof(BoundEntry));
+    install_malloc_hooks();
     return e;
 }
 
 static void bound_free_entry(BoundEntry *e)
 {
-    libc_free(e);
+    restore_malloc_hooks();
+    free(e);
+    install_malloc_hooks();
 }
 
 static BoundEntry *get_page(size_t index)
@@ -345,6 +423,8 @@ static void mark_invalid(size_t addr, size_t size)
     BoundEntry *page;
     size_t t1_start, t1_end, i, j, t2_start, t2_end;
 
+    dprintf(stderr, "mark_invalid: start = %lx, size = %lx\n", (unsigned long) 
addr, (unsigned long) size);
+
     start = addr;
     end = addr + size;
 
@@ -354,9 +434,7 @@ static void mark_invalid(size_t addr, size_t size)
     else
         t2_end = 1 << (BOUND_T1_BITS + BOUND_T2_BITS);
 
-#if 0
-    dprintf(stderr, "mark_invalid: start = %x %x\n", t2_start, t2_end);
-#endif
+    dprintf(stderr, "mark_invalid: start = %lx, end = %lx\n", (unsigned long) 
t2_start, (unsigned long) t2_end);
     
     /* first we handle full pages */
     t1_start = (t2_start + BOUND_T2_SIZE - 1) >> BOUND_T2_BITS;
@@ -397,16 +475,20 @@ void __bound_init(void)
     size_t i;
     BoundEntry *page;
     size_t start, size;
-    size_t *p;
 
     static int inited;
     if (inited)
-       return;
+        return;
 
     inited = 1;
 
+    print_calls = getenv ("TCC_BOUNDS_PRINT_CALLS") != NULL;
+    never_fatal = getenv ("TCC_BOUNDS_NEVER_FATAL") != NULL;
+
     dprintf(stderr, "%s, %s() start\n", __FILE__, __FUNCTION__);
 
+    INIT_SEM ();
+
     /* save malloc hooks and install bound check hooks */
     install_malloc_hooks();
 
@@ -467,25 +549,44 @@ void __bound_init(void)
     mark_invalid(start, size);
 #endif
 
+    dprintf(stderr, "%s, %s() end\n\n", __FILE__, __FUNCTION__);
+}
+
+void __bounds_add_static_var (size_t *p)
+{
     /* add all static bound check values */
-    p = (size_t *)&__bounds_start;
     while (p[0] != 0) {
         __bound_new_region((void *)p[0], p[1]);
         p += 2;
     }
-
-    dprintf(stderr, "%s, %s() end\n\n", __FILE__, __FUNCTION__);
 }
 
-void __bound_main_arg(void **p)
+void __bound_main_arg(char **p)
 {
     void *start = p;
-    while (*p++);
+    while (*p) {
+        __bound_new_region(*p, strlen (*p) + 1);
+        p++;
+    }
 
-    dprintf(stderr, "%s, %s calling __bound_new_region(%p %x)\n",
+    dprintf(stderr, "%s, %s calling __bound_new_region(%p 0x%x)\n",
             __FILE__, __FUNCTION__, start, (unsigned)((void *)p - start));
 
     __bound_new_region(start, (void *) p - start);
+
+#if HAS_ENVIRON
+    {
+        extern char **environ;
+
+        p = environ;
+        start = p;
+        while (*p) {
+            __bound_new_region(*p, strlen (*p) + 1);
+            p++;
+        }
+        __bound_new_region(start, (void *) p - start);
+    }
+#endif
 }
 
 void __bound_exit(void)
@@ -502,15 +603,18 @@ static inline void add_region(BoundEntry *e,
         /* no region : add it */
         e->start = start;
         e->size = size;
+        e->is_invalid = 0;
     } else {
         /* already regions in the list: add it at the head */
         e1 = bound_new_entry();
         e1->start = e->start;
         e1->size = e->size;
         e1->next = e->next;
+        e1->is_invalid = e->is_invalid;
         e->start = start;
         e->size = size;
         e->next = e1;
+        e1->is_invalid = 0;
     }
 }
 
@@ -521,13 +625,14 @@ void __bound_new_region(void *p, size_t size)
     BoundEntry *page, *e, *e2;
     size_t t1_start, t1_end, i, t2_start, t2_end;
 
-    dprintf(stderr, "%s, %s(%p, %x) start\n",
+    dprintf(stderr, "%s, %s(%p, 0x%x) start\n",
         __FILE__, __FUNCTION__, p, (unsigned)size);
 
     __bound_init();
 
+    WAIT_SEM ();
     start = (size_t)p;
-    end = start + size;
+    end = start + size - 1;
     t1_start = start >> (BOUND_T2_BITS + BOUND_T3_BITS);
     t1_end = end >> (BOUND_T2_BITS + BOUND_T3_BITS);
 
@@ -579,6 +684,7 @@ void __bound_new_region(void *p, size_t size)
         }
         add_region(e, start, size);
     }
+    POST_SEM ();
 
     dprintf(stderr, "%s, %s end\n", __FILE__, __FUNCTION__);
 }
@@ -591,18 +697,20 @@ static inline void delete_region(BoundEntry *e, void *p, 
size_t empty_size)
 
     addr = (size_t)p;
     addr -= e->start;
-    if (addr <= e->size) {
+    if (addr < e->size) {
         /* region found is first one */
         e1 = e->next;
         if (e1 == NULL) {
             /* no more region: mark it empty */
             e->start = 0;
             e->size = empty_size;
+            e->is_invalid = empty_size == INVALID_SIZE;
         } else {
             /* copy next region in head */
             e->start = e1->start;
             e->size = e1->size;
             e->next = e1->next;
+            e->is_invalid = e1->is_invalid;
             bound_free_entry(e1);
         }
     } else {
@@ -614,7 +722,7 @@ static inline void delete_region(BoundEntry *e, void *p, 
size_t empty_size)
             if (e == NULL)
                 break;
             addr = (size_t)p - e->start;
-            if (addr <= e->size) {
+            if (addr < e->size) {
                 /* found: remove entry */
                 e1->next = e->next;
                 bound_free_entry(e);
@@ -632,10 +740,11 @@ int __bound_delete_region(void *p)
     BoundEntry *page, *e, *e2;
     size_t t1_start, t1_end, t2_start, t2_end, i;
 
-    dprintf(stderr, "%s %s() start\n", __FILE__, __FUNCTION__);
+    dprintf(stderr, "%s %s(%p) start\n", __FILE__, __FUNCTION__, p);
 
     __bound_init();
 
+    WAIT_SEM ();
     start = (size_t)p;
     t1_start = start >> (BOUND_T2_BITS + BOUND_T3_BITS);
     t2_start = (start >> (BOUND_T3_BITS - BOUND_E_BITS)) & 
@@ -648,15 +757,18 @@ int __bound_delete_region(void *p)
     if (addr > e->size)
         e = __bound_find_region(e, p);
     /* test if invalid region */
-    if (e->size == EMPTY_SIZE || (size_t)p != e->start) 
+    if (e->size == EMPTY_SIZE || (size_t)p != e->start) {
+        POST_SEM ();
         return -1;
+    }
+
     /* compute the size we put in invalid regions */
     if (e->is_invalid)
         empty_size = INVALID_SIZE;
     else
         empty_size = EMPTY_SIZE;
     size = e->size;
-    end = start + size;
+    end = start + size - 1;
 
     /* now we can free each entry */
     t1_end = end >> (BOUND_T2_BITS + BOUND_T3_BITS);
@@ -702,6 +814,7 @@ int __bound_delete_region(void *p)
         }
         delete_region(e, p, empty_size);
     }
+    POST_SEM ();
 
     dprintf(stderr, "%s %s() end\n", __FILE__, __FUNCTION__);
 
@@ -713,8 +826,10 @@ int __bound_delete_region(void *p)
 static size_t get_region_size(void *p)
 {
     size_t addr = (size_t)p;
+    size_t size;
     BoundEntry *e;
 
+    WAIT_SEM ();
     e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)];
     e = (BoundEntry *)((char *)e + 
                        ((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) & 
@@ -723,8 +838,11 @@ static size_t get_region_size(void *p)
     if (addr > e->size)
         e = __bound_find_region(e, p);
     if (e->start != (size_t)p)
-        return EMPTY_SIZE;
-    return e->size;
+        size = EMPTY_SIZE;
+    else
+        size = e->size;
+    POST_SEM ();
+    return size;
 }
 
 /* patched memory functions */
@@ -763,17 +881,21 @@ static void restore_malloc_hooks(void)
 static void *libc_malloc(size_t size)
 {
     void *ptr;
+    WAIT_SEM ();
     restore_malloc_hooks();
     ptr = malloc(size);
     install_malloc_hooks();
+    POST_SEM ();
     return ptr;
 }
 
 static void libc_free(void *ptr)
 {
+    WAIT_SEM ();
     restore_malloc_hooks();
     free(ptr);
     install_malloc_hooks();
+    POST_SEM ();
 }
 
 /* XXX: we should use a malloc which ensure that it is unlikely that
@@ -791,7 +913,7 @@ void *__bound_malloc(size_t size, const void *caller)
     if (!ptr)
         return NULL;
 
-    dprintf(stderr, "%s, %s calling __bound_new_region(%p, %x)\n",
+    dprintf(stderr, "%s, %s calling __bound_new_region(%p, 0x%x)\n",
            __FILE__, __FUNCTION__, ptr, (unsigned)size);
 
     __bound_new_region(ptr, size);
@@ -802,6 +924,7 @@ void *__bound_memalign(size_t size, size_t align, const 
void *caller)
 {
     void *ptr;
 
+    WAIT_SEM ();
     restore_malloc_hooks();
 
 #ifndef HAVE_MEMALIGN
@@ -820,11 +943,12 @@ void *__bound_memalign(size_t size, size_t align, const 
void *caller)
 #endif
     
     install_malloc_hooks();
+    POST_SEM ();
     
     if (!ptr)
         return NULL;
 
-    dprintf(stderr, "%s, %s calling __bound_new_region(%p, %x)\n",
+    dprintf(stderr, "%s, %s calling __bound_new_region(%p, 0x%x)\n",
            __FILE__, __FUNCTION__, ptr, (unsigned)size);
 
     __bound_new_region(ptr, size);
@@ -836,7 +960,11 @@ void __bound_free(void *ptr, const void *caller)
     if (ptr == NULL)
         return;
     if (__bound_delete_region(ptr) != 0)
-        bound_error("freeing invalid region");
+#if 0 /* glibc thread code fails with this */
+        bound_error("freeing invalid region")
+#endif
+        ;
+
 
     libc_free(ptr);
 }
@@ -856,7 +984,7 @@ void *__bound_realloc(void *ptr, size_t size, const void 
*caller)
         old_size = get_region_size(ptr);
         if (old_size == EMPTY_SIZE)
             bound_error("realloc'ing invalid pointer");
-        memcpy(ptr1, ptr, old_size);
+        memcpy(ptr1, ptr, old_size < size ? old_size : size);
         __bound_free(ptr, caller);
         return ptr1;
     }
@@ -875,8 +1003,7 @@ void *__bound_calloc(size_t nmemb, size_t size)
 }
 #endif
 
-#if 0
-static void bound_dump(void)
+void bound_dump(void)
 {
     BoundEntry *page, *e;
     size_t i, j;
@@ -888,11 +1015,15 @@ static void bound_dump(void)
             e = page + j;
             /* do not print invalid or empty entries */
             if (e->size != EMPTY_SIZE && e->start != 0) {
-                fprintf(stderr, "%08x:", 
-                       (i << (BOUND_T2_BITS + BOUND_T3_BITS)) + 
-                       (j << BOUND_T3_BITS));
+                fprintf(stderr, "%016lx:", 
+                        (unsigned long)
+                        ((i << (BOUND_T2_BITS + BOUND_T3_BITS)) + 
+                         (j << BOUND_T3_BITS)));
                 do {
-                    fprintf(stderr, " %08lx:%08lx", e->start, e->start + 
e->size);
+                    fprintf(stderr, " %p:%p(%u)",
+                            (void *) e->start,
+                            (void *) (e->start + e->size),
+                            (unsigned)e->is_invalid);
                     e = e->next;
                 } while (e != NULL);
                 fprintf(stderr, "\n");
@@ -900,7 +1031,6 @@ static void bound_dump(void)
         }
     }
 }
-#endif
 
 /* some useful checked functions */
 
@@ -918,7 +1048,7 @@ void *__bound_memcpy(void *dst, const void *src, size_t 
size)
 {
     void* p;
 
-    dprintf(stderr, "%s %s: start, dst=%p src=%p size=%x\n",
+    dprintf(stderr, "%s %s: start, dst=%p src=%p size=0x%x\n",
             __FILE__, __FUNCTION__, dst, src, (unsigned)size);
 
     __bound_check(dst, size);
diff --git a/libtcc.c b/libtcc.c
index db30223..6aa2afd 100644
--- a/libtcc.c
+++ b/libtcc.c
@@ -132,7 +132,7 @@ BOOL WINAPI DllMain (HINSTANCE hDll, DWORD dwReason, LPVOID 
lpReserved)
 
 /********************************************************/
 /* copy a string and truncate it. */
-ST_FUNC char *pstrcpy(char *buf, int buf_size, const char *s)
+ST_FUNC char *pstrcpy(char *buf, size_t buf_size, const char *s)
 {
     char *q, *q_end;
     int c;
@@ -152,9 +152,9 @@ ST_FUNC char *pstrcpy(char *buf, int buf_size, const char 
*s)
 }
 
 /* strcat and truncate. */
-ST_FUNC char *pstrcat(char *buf, int buf_size, const char *s)
+ST_FUNC char *pstrcat(char *buf, size_t buf_size, const char *s)
 {
-    int len;
+    size_t len;
     len = strlen(buf);
     if (len < buf_size)
         pstrcpy(buf + len, buf_size - len, s);
diff --git a/riscv64-gen.c b/riscv64-gen.c
index 163657c..2359a0e 100644
--- a/riscv64-gen.c
+++ b/riscv64-gen.c
@@ -614,8 +614,9 @@ ST_FUNC void gfunc_call(int nb_args)
 
 static int func_sub_sp_offset, num_va_regs, func_va_list_ofs;
 
-ST_FUNC void gfunc_prolog(CType *func_type)
+ST_FUNC void gfunc_prolog(Sym *func_sym)
 {
+    CType *func_type = &func_sym->type;
     int i, addr, align, size;
     int param_addr = 0;
     int areg[2];
diff --git a/tcc.h b/tcc.h
index 6c70f33..1382e8d 100644
--- a/tcc.h
+++ b/tcc.h
@@ -493,19 +493,19 @@ typedef struct Sym {
             int c; /* associated number or Elf symbol index */
             union {
                 int sym_scope; /* scope level for locals */
-               int jnext; /* next jump label */
+                int jnext; /* next jump label */
                 struct FuncAttr f; /* function attributes */
                 int auxtype; /* bitfield access type */
             };
         };
         long long enum_val; /* enum constant if IS_ENUM_VAL */
         int *d; /* define token stream */
-       struct Sym *ncl; /* next cleanup */
+        struct Sym *ncl; /* next cleanup */
     };
     CType type; /* associated type */
     union {
         struct Sym *next; /* next related symbol (for fields and anoms) */
-       struct Sym *cleanupstate; /* in defined labels */
+        struct Sym *cleanupstate; /* in defined labels */
         int asm_label; /* associated asm label */
     };
     struct Sym *prev; /* prev symbol in stack */
@@ -673,20 +673,20 @@ struct sym_attr {
 };
 
 struct TCCState {
-    int verbose; /* if true, display some information during compilation */
-    int nostdinc; /* if true, no standard headers are added */
-    int nostdlib; /* if true, no standard libraries are added */
-    int nocommon; /* if true, do not use common symbols for .bss data */
-    int static_link; /* if true, static linking is performed */
-    int rdynamic; /* if true, all symbols are exported */
-    int symbolic; /* if true, resolve symbols in the current module first */
-    int filetype; /* file type for compilation (NONE,C,ASM) */
-    int cversion; /* supported C ISO version, 199901 (the default), 201112, 
... */
+    unsigned char verbose; /* if true, display some information during 
compilation */
+    unsigned char nostdinc; /* if true, no standard headers are added */
+    unsigned char nostdlib; /* if true, no standard libraries are added */
+    unsigned char nocommon; /* if true, do not use common symbols for .bss 
data */
+    unsigned char static_link; /* if true, static linking is performed */
+    unsigned char rdynamic; /* if true, all symbols are exported */
+    unsigned char symbolic; /* if true, resolve symbols in the current module 
first */
+    unsigned char filetype; /* file type for compilation (NONE,C,ASM) */
+    unsigned int  cversion; /* supported C ISO version, 199901 (the default), 
201112, ... */
 
     char *tcc_lib_path; /* CONFIG_TCCDIR or -B option */
     char *soname; /* as specified on the command line (-soname) */
     char *rpath; /* as specified on the command line (-Wl,-rpath=) */
-    int enable_new_dtags; /* ditto, (-Wl,--enable-new-dtags) */
+    unsigned char enable_new_dtags; /* ditto, (-Wl,--enable-new-dtags) */
 
     /* output type, see TCC_OUTPUT_XXX */
     int output_type;
@@ -694,25 +694,25 @@ struct TCCState {
     int output_format;
 
     /* C language options */
-    int char_is_unsigned;
-    int leading_underscore;
-    int ms_extensions; /* allow nested named struct w/o identifier behave like 
unnamed */
-    int dollars_in_identifiers;        /* allows '$' char in identifiers */
-    int ms_bitfields; /* if true, emulate MS algorithm for aligning bitfields 
*/
+    unsigned char char_is_unsigned;
+    unsigned char leading_underscore;
+    unsigned char ms_extensions; /* allow nested named struct w/o identifier 
behave like unnamed */
+    unsigned char dollars_in_identifiers;  /* allows '$' char in identifiers */
+    unsigned char ms_bitfields; /* if true, emulate MS algorithm for aligning 
bitfields */
 
     /* warning switches */
-    int warn_write_strings;
-    int warn_unsupported;
-    int warn_error;
-    int warn_none;
-    int warn_implicit_function_declaration;
-    int warn_gcc_compat;
+    unsigned char warn_write_strings;
+    unsigned char warn_unsupported;
+    unsigned char warn_error;
+    unsigned char warn_none;
+    unsigned char warn_implicit_function_declaration;
+    unsigned char warn_gcc_compat;
 
     /* compile with debug symbol (and use them if error during execution) */
-    int do_debug;
+    unsigned char do_debug;
 #ifdef CONFIG_TCC_BCHECK
     /* compile with built-in memory and bounds checker */
-    int do_bounds_check;
+    unsigned char do_bounds_check;
 #endif
 #ifdef TCC_TARGET_ARM
     enum float_abi float_abi; /* float ABI of the generated code*/
@@ -720,7 +720,7 @@ struct TCCState {
     int run_test; /* nth test to run with -dt -run */
 
     addr_t text_addr; /* address of text section */
-    int has_text_addr;
+    unsigned char has_text_addr;
 
     unsigned section_align; /* section alignment */
 
@@ -847,11 +847,11 @@ struct TCCState {
     int nb_files; /* number thereof */
     int nb_libraries; /* number of libs thereof */
     char *outfile; /* output filename */
-    int option_r; /* option -r */
-    int do_bench; /* option -bench */
+    unsigned char option_r; /* option -r */
+    unsigned char do_bench; /* option -bench */
     int gen_deps; /* option -MD  */
     char *deps_outfile; /* option -MF */
-    int option_pthread; /* -pthread option */
+    unsigned char option_pthread; /* -pthread option */
     int argc;
     char **argv;
 };
@@ -1120,8 +1120,8 @@ ST_DATA int tcc_ext;
 ST_DATA struct TCCState *tcc_state;
 
 /* public functions currently used by the tcc main function */
-ST_FUNC char *pstrcpy(char *buf, int buf_size, const char *s);
-ST_FUNC char *pstrcat(char *buf, int buf_size, const char *s);
+ST_FUNC char *pstrcpy(char *buf, size_t buf_size, const char *s);
+ST_FUNC char *pstrcat(char *buf, size_t buf_size, const char *s);
 ST_FUNC char *pstrncpy(char *out, const char *in, size_t num);
 PUB_FUNC char *tcc_basename(const char *name);
 PUB_FUNC char *tcc_fileextension (const char *name);
@@ -1518,7 +1518,7 @@ ST_FUNC void load(int r, SValue *sv);
 ST_FUNC void store(int r, SValue *v);
 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *align, int 
*regsize);
 ST_FUNC void gfunc_call(int nb_args);
-ST_FUNC void gfunc_prolog(CType *func_type);
+ST_FUNC void gfunc_prolog(Sym *func_sym);
 ST_FUNC void gfunc_epilog(void);
 ST_FUNC void gen_fill_nops(int);
 ST_FUNC int gjmp(int t);
diff --git a/tccelf.c b/tccelf.c
index 70e4f87..e2b9268 100644
--- a/tccelf.c
+++ b/tccelf.c
@@ -1322,14 +1322,15 @@ ST_FUNC void tcc_add_bcheck(TCCState *s1)
 #ifdef CONFIG_TCC_BCHECK
     addr_t *ptr;
     int sym_index;
+    int bsym_index;
 
     if (0 == s1->do_bounds_check)
         return;
     /* XXX: add an object file to do that */
     ptr = section_ptr_add(bounds_section, sizeof(*ptr));
     *ptr = 0;
-    set_elf_sym(symtab_section, 0, 0,
-                ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE), 0,
+    bsym_index = set_elf_sym(symtab_section, 0, 0,
+                ELFW(ST_INFO)(STB_LOCAL, STT_NOTYPE), 0,
                 bounds_section->sh_num, "__bounds_start");
     /* pull bcheck.o from libtcc1.a */
     sym_index = set_elf_sym(symtab_section, 0, 0,
@@ -1344,6 +1345,39 @@ ST_FUNC void tcc_add_bcheck(TCCState *s1)
         put_elf_reloc(symtab_section, init_section,
             init_section->data_offset - 4, R_386_PC32, sym_index);
             /* R_386_PC32 = R_X86_64_PC32 = 2 */
+#ifdef TCC_TARGET_I386
+        pinit = section_ptr_add(init_section, 6);
+        pinit[0] = 0xb8;       /* mov xx,%eax */
+        write32le(pinit + 1, 0);
+        pinit[5] = 0x50;        /* push %eax */
+        put_elf_reloc(symtab_section, init_section,
+                init_section->data_offset - 5, R_386_32, bsym_index);
+#else
+        pinit = section_ptr_add(init_section, 13);
+        pinit[0] = 0x48;       /* mov xx,%rax */
+        pinit[1] = 0xb8;
+        write64le(pinit + 2, 0);
+        pinit[10] = 0x48;      /* mov %rax,%rdi */
+        pinit[11] = 0x89;
+        pinit[12] = 0xc7;
+        put_elf_reloc(symtab_section, init_section,
+                init_section->data_offset - 11, R_X86_64_64, bsym_index);
+#endif
+        sym_index = set_elf_sym(symtab_section, 0, 0,
+                        ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE), 0,
+                        SHN_UNDEF, "__bounds_add_static_var");
+        pinit = section_ptr_add(init_section, 5);
+        pinit[0] = 0xe8;
+        write32le(pinit + 1, -4);
+        put_elf_reloc(symtab_section, init_section,
+            init_section->data_offset - 4, R_386_PC32, sym_index);
+                /* R_386_PC32 = R_X86_64_PC32 = 2 */
+#ifdef TCC_TARGET_I386
+        pinit = section_ptr_add(init_section, 3);
+        pinit[0] = 0x83;       /* add  $0x4,%esp */
+        pinit[1] = 0xc4;
+        pinit[2] = 0x04;
+#endif
     }
 #endif
 }
@@ -1366,7 +1400,12 @@ ST_FUNC void tcc_add_runtime(TCCState *s1)
                 tcc_add_dll(s1, TCC_LIBGCC, 0);
         }
 #endif
-        tcc_add_support(s1, TCC_LIBTCC1);
+#ifdef CONFIG_TCC_BCHECK
+       if (s1->do_bounds_check)
+            tcc_add_library_err(s1, "pthread");
+       if (s1->do_bounds_check == 0 || s1->output_type != TCC_OUTPUT_DLL)
+#endif
+            tcc_add_support(s1, TCC_LIBTCC1);
         /* add crt end if not memory output */
         if (s1->output_type != TCC_OUTPUT_MEMORY)
             tcc_add_crt(s1, "crtn.o");
@@ -2814,6 +2853,7 @@ static int tcc_load_alacarte(TCCState *s1, int fd, int 
size, int entrysize)
     const char *ar_names, *p;
     const uint8_t *ar_index;
     ElfW(Sym) *sym;
+    Section *s;
 
     data = tcc_malloc(size);
     if (full_read(fd, data, size) != size)
@@ -2825,9 +2865,14 @@ static int tcc_load_alacarte(TCCState *s1, int fd, int 
size, int entrysize)
     do {
         bound = 0;
         for(p = ar_names, i = 0; i < nsyms; i++, p += strlen(p)+1) {
-            sym_index = find_elf_sym(symtab_section, p);
+            s = symtab_section;
+            sym_index = find_elf_sym(s, p);
+            if(sym_index == 0) {
+                s = s1->dynsymtab_section;
+                sym_index = find_elf_sym(s, p);
+            }
             if(sym_index) {
-                sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
+                sym = &((ElfW(Sym) *)s->data)[sym_index];
                 if(sym->st_shndx == SHN_UNDEF) {
                     off = (entrysize == 4
                           ? get_be32(ar_index + i * 4)
diff --git a/tccgen.c b/tccgen.c
index a6181b0..fe2121b 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -1191,7 +1191,11 @@ ST_FUNC void save_reg_upstack(int r, int n)
                     type = &int_type;
 #endif
                 size = type_size(type, &align);
-                               l=get_temp_local_var(size,align);
+                if ((p->r2 & VT_VALMASK) < VT_CONST) {
+                   size *= 2;
+                   align *= 2;
+               }
+               l=get_temp_local_var(size,align);
                 sv.type.t = type->t;
                 sv.r = VT_LOCAL | VT_LVAL;
                 sv.c.i = l;
@@ -1375,7 +1379,7 @@ static void gbound(void)
 
     vtop->r &= ~VT_MUSTBOUND;
     /* if lvalue, then use checking code before dereferencing */
-    if (vtop->r & VT_LVAL) {
+    if ((vtop->r & VT_LVAL) && !nocode_wanted) {
         /* if not VT_BOUNDED value, then make one */
         if (!(vtop->r & VT_BOUNDED)) {
             lval_type = vtop->r & (VT_LVAL_TYPE | VT_LVAL);
@@ -7413,7 +7417,8 @@ static void decl_initializer_alloc(CType *type, 
AttributeDef *ad, int r,
     if ((r & VT_VALMASK) == VT_LOCAL) {
         sec = NULL;
 #ifdef CONFIG_TCC_BCHECK
-        if (bcheck && (type->t & VT_ARRAY)) {
+        if (bcheck && ((type->t & VT_ARRAY) ||
+                      (type->t & VT_BTYPE) == VT_STRUCT)) {
             loc--;
         }
 #endif
@@ -7422,8 +7427,9 @@ static void decl_initializer_alloc(CType *type, 
AttributeDef *ad, int r,
 #ifdef CONFIG_TCC_BCHECK
         /* handles bounds */
         /* XXX: currently, since we do only one pass, we cannot track
-           '&' operators, so we add only arrays */
-        if (bcheck && (type->t & VT_ARRAY)) {
+           '&' operators, so we add only arrays/structs/unions */
+        if (bcheck && ((type->t & VT_ARRAY) ||
+                      (type->t & VT_BTYPE) == VT_STRUCT)) {
             addr_t *bounds_ptr;
             /* add padding between regions */
             loc--;
@@ -7599,7 +7605,7 @@ static void gen_function(Sym *sym, AttributeDef *ad)
     /* push a dummy symbol to enable local sym storage */
     sym_push2(&local_stack, SYM_FIELD, 0, 0);
     local_scope = 1; /* for function parameters */
-    gfunc_prolog(&sym->type);
+    gfunc_prolog(sym);
     local_scope = 0;
     rsym = 0;
     clear_temp_local_var_list();
diff --git a/tests/Makefile b/tests/Makefile
index 9a4d123..ad8ab63 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -158,7 +158,8 @@ btest: boundtest.c
        @echo ------------ $@ ------------
        @for i in $(BOUNDS_OK); do \
           echo ; echo --- boundtest $$i ---; \
-          if $(TCC) -b -run $< $$i ; then \
+          $(TCC) -b $< -o boundtest; \
+          if ./boundtest $$i ; then \
               echo succeeded as expected; \
           else\
               echo Failed positive test $$i ; exit 1 ; \
@@ -166,8 +167,9 @@ btest: boundtest.c
        done ;\
        for i in $(BOUNDS_FAIL); do \
           echo ; echo --- boundtest $$i ---; \
-          if $(TCC) -b -run $< $$i ; then \
-              echo Failed negative test $$i ; exit 1 ;\
+          $(TCC) -b $< -o boundtest; \
+          if ./boundtest $$i ; then \
+              echo Failed negative test $$i ; exit 1 ; \
           else\
               echo failed as expected; \
           fi ;\
diff --git a/tests/boundtest.c b/tests/boundtest.c
index e5c3ff4..ec27bf2 100644
--- a/tests/boundtest.c
+++ b/tests/boundtest.c
@@ -253,10 +253,20 @@ int (*table_test[])(void) = {
 
 int main(int argc, char **argv)
 {
+    int i;
+    char *cp;
     int index;
     int (*ftest)(void);
     int index_max = sizeof(table_test)/sizeof(table_test[0]);
 
+    /* check bounds checking main arg */
+    for (i = 0; i < argc; i++) {
+        cp = argv[i];
+        while (*cp) {
+            cp++;
+        }
+    }
+
     if (argc < 2) {
         printf(
            "test TCC bound checking system\n"
diff --git a/tests/tcctest.c b/tests/tcctest.c
index e4645bb..db4789c 100644
--- a/tests/tcctest.c
+++ b/tests/tcctest.c
@@ -123,6 +123,7 @@ void math_cmp_test(void);
 void callsave_test(void);
 void builtin_frame_address_test(void);
 void attrib_test(void);
+void bounds_check1_test(void);
 
 int fib(int n);
 void num(int n);
@@ -771,6 +772,7 @@ int main(int argc, char **argv)
     if (via_volatile (42) != 42)
       printf ("via_volatile broken\n");
     attrib_test();
+    bounds_check1_test();
     return 0; 
 }
 
@@ -3884,9 +3886,11 @@ void builtin_frame_address_test(void)
 
     printf("str: %s\n", str);
 #ifndef __riscv
+#ifndef __BOUNDS_CHECKING_ON
     bfa1(str-fp0);
 #endif
 #endif
+#endif
 }
 
 char via_volatile (char i)
@@ -3961,3 +3965,18 @@ int force_get_order(unsigned long s)
 {
     return __get_order(s);
 }
+
+#define pv(m) printf(sizeof (s->m + 0) == 8 ? "%016lx\n" : "%02x\n", s->m)
+
+/* Test failed when using bounds checking */
+void bounds_check1_test (void)
+{
+    struct s {
+        int x;
+        long long y;
+    } _s, *s = &_s;
+    s->x = 10;
+    s->y = 20;
+    pv(x);
+    pv(y);
+}
diff --git a/x86_64-gen.c b/x86_64-gen.c
index cc66b60..677d8fb 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -641,11 +641,15 @@ static addr_t func_bound_offset;
 static unsigned long func_bound_ind;
 #endif
 
-static void gen_static_call(int v)
+static void gen_bounds_call(int v)
 {
     Sym *sym = external_global_sym(v, &func_old_type);
     oad(0xe8, 0);
+#ifdef TCC_TARGET_PE
     greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
+#else
+    greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4);
+#endif
 }
 
 /* generate a bounded pointer addition */
@@ -654,6 +658,10 @@ ST_FUNC void gen_bounded_ptr_add(void)
     /* save all temporary registers */
     save_regs(0);
 
+    o(0x51525657); /* push $rdi/%rsi/%rdx/%rcx */
+    o(0x51415041); /* push $r8/%r9 */
+    o(0x53415241); /* push $r10/%r11 */
+
     /* prepare fast x86_64 function call */
     gv(RC_RAX);
     o(0xc68948); // mov  %rax,%rsi ## second arg in %rsi, this must be size
@@ -664,12 +672,15 @@ ST_FUNC void gen_bounded_ptr_add(void)
     vtop--;
 
     /* do a fast function call */
-    gen_static_call(TOK___bound_ptr_add);
+    gen_bounds_call(TOK___bound_ptr_add);
 
     /* returned pointer is in rax */
     vtop++;
     vtop->r = TREG_RAX | VT_BOUNDED;
 
+    o(0x5a415b41); /* pop $r11/%r10 */
+    o(0x58415941); /* pop $r9/%r8 */
+    o(0x5f5e5a59); /* pop $rcx/$rdx/$rsi/%rdi */
 
     /* relocation offset of the bounding function call point */
     vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
@@ -935,8 +946,9 @@ void gfunc_call(int nb_args)
 #define FUNC_PROLOG_SIZE 11
 
 /* generate function prolog of type 't' */
-void gfunc_prolog(CType *func_type)
+void gfunc_prolog(Sym *func_sym)
 {
+    CType *func_type = &func_sym->type;
     int addr, reg_param_index, bt, size;
     Sym *sym;
     CType *type;
@@ -1430,11 +1442,13 @@ static void push_arg_reg(int i) {
 }
 
 /* generate function prolog of type 't' */
-void gfunc_prolog(CType *func_type)
+void gfunc_prolog(Sym *func_sym)
 {
+    CType *func_type = &func_sym->type;
     X86_64_Mode mode;
     int i, addr, align, size, reg_count;
     int param_addr = 0, reg_param_index, sse_param_index;
+    int n_arg = 0;
     Sym *sym;
     CType *type;
 
@@ -1518,6 +1532,7 @@ void gfunc_prolog(CType *func_type)
     }
     /* define parameters */
     while ((sym = sym->next) != NULL) {
+       n_arg++;
         type = &sym->type;
         mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
         switch (mode) {
@@ -1574,9 +1589,14 @@ void gfunc_prolog(CType *func_type)
     if (tcc_state->do_bounds_check) {
         func_bound_offset = lbounds_section->data_offset;
         func_bound_ind = ind;
-        oad(0xb8, 0); /* lbound section pointer */
+        o(0xb848); /* lbound section pointer */
+        gen_le64 (0);
        o(0xc78948);  /* mov  %rax,%rdi ## first arg in %rdi, this must be ptr 
*/
        oad(0xb8, 0); /* call to function */
+       if (n_arg >= 2 && strcmp (get_tok_str(func_sym->v, NULL), "main") == 0) 
{
+           o(0xf07d8b48);  /* mov -0x10(%rbp),%rdi */
+            gen_bounds_call(TOK___bound_main_arg);
+       }
     }
 #endif
 }
@@ -1603,17 +1623,18 @@ void gfunc_epilog(void)
                                func_bound_offset, 
lbounds_section->data_offset);
         saved_ind = ind;
         ind = func_bound_ind;
-        greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0);
-        ind = ind + 5 + 3;
-        gen_static_call(TOK___bound_local_new);
+        greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
+        ind = ind + 10 + 3;
+        gen_bounds_call(TOK___bound_local_new);
         ind = saved_ind;
 
         /* generate bound check local freeing */
         o(0x5250); /* save returned value, if any */
-        greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0);
-        oad(0xb8, 0); /* mov xxx, %rax */
+        greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
+        o(0xb848); /* mov xxx, %rax */
+       gen_le64 (0);
         o(0xc78948);  /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
-        gen_static_call(TOK___bound_local_delete);
+        gen_bounds_call(TOK___bound_local_delete);
         o(0x585a); /* restore returned value, if any */
     }
 #endif
@@ -1940,6 +1961,7 @@ void gen_opf(int op)
                 v1.c.i = fc;
                 load(r, &v1);
                 fc = 0;
+                vtop->r = r = r | VT_LVAL;
             }
 
             if (op == TOK_EQ || op == TOK_NE) {
@@ -2007,6 +2029,7 @@ void gen_opf(int op)
                 v1.c.i = fc;
                 load(r, &v1);
                 fc = 0;
+                vtop->r = r = r | VT_LVAL;
             }
             
             assert(!(vtop[-1].r & VT_LVAL));
diff --git a/tests/tests2/110_average.c b/tests/tests2/110_average.c
new file mode 100644
index 0000000..273b511
--- /dev/null
+++ b/tests/tests2/110_average.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+
+typedef struct
+{
+    double average;
+    int count;
+}
+stats_type;
+
+static void
+testc (stats_type *s, long long data)
+{
+    s->average = (s->average * s->count + data) / (s->count + 1);
+    s->count++;
+}
+
+int main (void)
+{
+    stats_type s;
+
+    s.average = 0;
+    s.count = 0;
+    testc (&s, 10);
+    testc (&s, 20);
+    printf ("%g %d\n", s.average, s.count);
+    return 0;
+}
diff --git a/tests/tests2/110_average.expect b/tests/tests2/110_average.expect
new file mode 100644
index 0000000..4955335
--- /dev/null
+++ b/tests/tests2/110_average.expect
@@ -0,0 +1 @@
+15 2
diff --git a/tests/tests2/111_conversion.c b/tests/tests2/111_conversion.c
new file mode 100644
index 0000000..c0815e1
--- /dev/null
+++ b/tests/tests2/111_conversion.c
@@ -0,0 +1,22 @@
+#include <stdio.h>
+
+union u {
+    unsigned long ul;
+    long double ld;
+};
+
+void
+conv (union u *p)
+{
+    p->ul = (unsigned int) p->ld;
+}
+
+int main (void)
+{
+    union u v;
+
+    v.ld = 42;
+    conv (&v);
+    printf ("%lu\n", v.ul);
+    return 0;
+}
diff --git a/tests/tests2/111_conversion.expect 
b/tests/tests2/111_conversion.expect
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/tests/tests2/111_conversion.expect
@@ -0,0 +1 @@
+42
_______________________________________________
Tinycc-devel mailing list
Tinycc-devel@nongnu.org
https://lists.nongnu.org/mailman/listinfo/tinycc-devel

Reply via email to