Here's a patch that makes TCC pass its tests on i386 under ASan and
UBSan without any errors or warnings. Does this seem reasonable?
Anyone object to this approach being extended to all architectures?
Edmund
diff --git a/i386-gen.c b/i386-gen.c
index 993293e..d202925 100644
--- a/i386-gen.c
+++ b/i386-gen.c
@@ -138,11 +138,10 @@ ST_FUNC void gen_le32(int c)
/* output a symbol and patch all calls to it */
ST_FUNC void gsym_addr(int t, int a)
{
- int n, *ptr;
while (t) {
- ptr = (int *)(cur_text_section->data + t);
- n = *ptr; /* next value */
- *ptr = a - t - 4;
+ unsigned char *ptr = cur_text_section->data + t;
+ uint32_t n = read32le(ptr); /* next value */
+ write32le(ptr, a - t - 4);
t = n;
}
}
@@ -165,7 +164,7 @@ ST_FUNC int oad(int c, int s)
ind1 = ind + 4;
if (ind1 > cur_text_section->data_allocated)
section_realloc(cur_text_section, ind1);
- *(int *)(cur_text_section->data + ind) = s;
+ write32le(cur_text_section->data + ind, s);
s = ind;
ind = ind1;
return s;
@@ -671,9 +670,7 @@ ST_FUNC void gjmp_addr(int a)
/* generate a test. set 'inv' to invert test. Stack entry is popped */
ST_FUNC int gtst(int inv, int t)
{
- int v, t1, *p;
-
- v = vtop->r & VT_VALMASK;
+ int v = vtop->r & VT_VALMASK;
if (v == VT_CMP) {
/* fast case : can jump directly since flags are set */
g(0x0f);
@@ -681,14 +678,12 @@ ST_FUNC int gtst(int inv, int t)
} else if (v == VT_JMP || v == VT_JMPI) {
/* && or || optimization */
if ((v & 1) == inv) {
+ uint32_t n1, n = vtop->c.i;
/* insert vtop->c jump list in t */
- t1 = vtop->c.i;
- p = &t1;
- while (*p != 0)
- p = (int *)(cur_text_section->data + *p);
- *p = t;
- vtop->c.i = t1;
- t = t1;
+ while ((n1 = read32le(cur_text_section->data + n)))
+ n = n1;
+ write32le(cur_text_section->data + n, t);
+ t = vtop->c.i;
} else {
t = gjmp(t);
gsym(vtop->c.i);
diff --git a/tcc.h b/tcc.h
index ff846fd..60c0e5e 100644
--- a/tcc.h
+++ b/tcc.h
@@ -131,6 +131,20 @@
#include "stab.h"
#include "libtcc.h"
+static inline uint32_t read32le(unsigned char *p)
+{
+ return (p[0] | (uint32_t)p[1] << 8 |
+ (uint32_t)p[2] << 16 | (uint32_t)p[3] << 24);
+}
+
+static inline void write32le(unsigned char *p, uint32_t x)
+{
+ p[0] = x & 255;
+ p[1] = x >> 8 & 255;
+ p[2] = x >> 16 & 255;
+ p[3] = x >> 24 & 255;
+}
+
/* parser debug */
/* #define PARSE_DEBUG */
/* preprocessor debug */
diff --git a/tccelf.c b/tccelf.c
index 60320be..daaedf2 100644
--- a/tccelf.c
+++ b/tccelf.c
@@ -528,7 +528,7 @@ ST_FUNC void relocate_section(TCCState *s1, Section *s)
qrel++;
}
}
- *(int *)ptr += val;
+ write32le(ptr, read32le(ptr) + val);
break;
case R_386_PC32:
if (s1->output_type == TCC_OUTPUT_DLL) {
@@ -541,24 +541,24 @@ ST_FUNC void relocate_section(TCCState *s1, Section *s)
break;
}
}
- *(int *)ptr += val - addr;
+ write32le(ptr, read32le(ptr) + val - addr);
break;
case R_386_PLT32:
- *(int *)ptr += val - addr;
+ write32le(ptr, read32le(ptr) + val - addr);
break;
case R_386_GLOB_DAT:
case R_386_JMP_SLOT:
- *(int *)ptr = val;
+ write32le(ptr, val);
break;
case R_386_GOTPC:
- *(int *)ptr += s1->got->sh_addr - addr;
+ write32le(ptr, read32le(ptr) + s1->got->sh_addr - addr);
break;
case R_386_GOTOFF:
- *(int *)ptr += val - s1->got->sh_addr;
+ write32le(ptr, read32le(ptr) + val - s1->got->sh_addr);
break;
case R_386_GOT32:
/* we load the got offset */
- *(int *)ptr += s1->sym_attrs[sym_index].got_offset;
+ write32le(ptr, read32le(ptr) +
s1->sym_attrs[sym_index].got_offset);
break;
case R_386_16:
if (s1->output_format != TCC_OUTPUT_FORMAT_BINARY) {
@@ -1032,23 +1032,6 @@ static struct sym_attr *alloc_sym_attr(TCCState *s1, int
index)
return &s1->sym_attrs[index];
}
-/* XXX: suppress that */
-static void put32(unsigned char *p, uint32_t val)
-{
- p[0] = val;
- p[1] = val >> 8;
- p[2] = val >> 16;
- p[3] = val >> 24;
-}
-
-#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_ARM) || \
- defined(TCC_TARGET_ARM64) || defined(TCC_TARGET_X86_64)
-static uint32_t get32(unsigned char *p)
-{
- return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
-}
-#endif
-
static void build_got(TCCState *s1)
{
unsigned char *ptr;
@@ -1061,19 +1044,19 @@ static void build_got(TCCState *s1)
ptr = section_ptr_add(s1->got, 3 * PTR_SIZE);
#if PTR_SIZE == 4
/* keep space for _DYNAMIC pointer, if present */
- put32(ptr, 0);
+ write32le(ptr, 0);
/* two dummy got entries */
- put32(ptr + 4, 0);
- put32(ptr + 8, 0);
+ write32le(ptr + 4, 0);
+ write32le(ptr + 8, 0);
#else
/* keep space for _DYNAMIC pointer, if present */
- put32(ptr, 0);
- put32(ptr + 4, 0);
+ write32le(ptr, 0);
+ write32le(ptr + 4, 0);
/* two dummy got entries */
- put32(ptr + 8, 0);
- put32(ptr + 12, 0);
- put32(ptr + 16, 0);
- put32(ptr + 20, 0);
+ write32le(ptr + 8, 0);
+ write32le(ptr + 12, 0);
+ write32le(ptr + 16, 0);
+ write32le(ptr + 20, 0);
#endif
}
@@ -1155,10 +1138,10 @@ static unsigned long put_got_entry(TCCState *s1,
p = section_ptr_add(plt, 16);
p[0] = 0xff; /* pushl got + PTR_SIZE */
p[1] = modrm + 0x10;
- put32(p + 2, PTR_SIZE);
+ write32le(p + 2, PTR_SIZE);
p[6] = 0xff; /* jmp *(got + PTR_SIZE * 2) */
p[7] = modrm;
- put32(p + 8, PTR_SIZE * 2);
+ write32le(p + 8, PTR_SIZE * 2);
}
/* The PLT slot refers to the relocation entry it needs
@@ -1169,16 +1152,16 @@ static unsigned long put_got_entry(TCCState *s1,
p = section_ptr_add(plt, 16);
p[0] = 0xff; /* jmp *(got + x) */
p[1] = modrm;
- put32(p + 2, s1->got->data_offset);
+ write32le(p + 2, s1->got->data_offset);
p[6] = 0x68; /* push $xxx */
#ifdef TCC_TARGET_X86_64
/* On x86-64, the relocation is referred to by _index_. */
- put32(p + 7, relofs / sizeof (ElfW_Rel));
+ write32le(p + 7, relofs / sizeof (ElfW_Rel));
#else
- put32(p + 7, relofs);
+ write32le(p + 7, relofs);
#endif
p[11] = 0xe9; /* jmp plt_start */
- put32(p + 12, -(plt->data_offset));
+ write32le(p + 12, -(plt->data_offset));
/* If this was an UNDEF symbol set the offset in the
dynsymtab to the PLT slot, so that PC32 relocs to it
@@ -1200,24 +1183,24 @@ static unsigned long put_got_entry(TCCState *s1,
if (plt->data_offset == 0) {
/* first plt entry */
p = section_ptr_add(plt, 16);
- put32(p, 0xe52de004); /* push {lr} */
- put32(p+4, 0xe59fe010); /* ldr lr, [pc, #16] */
- put32(p+8, 0xe08fe00e); /* add lr, pc, lr */
- put32(p+12, 0xe5bef008); /* ldr pc, [lr, #8]! */
+ write32le(p, 0xe52de004); /* push {lr} */
+ write32le(p+4, 0xe59fe010); /* ldr lr, [pc, #16] */
+ write32le(p+8, 0xe08fe00e); /* add lr, pc, lr */
+ write32le(p+12, 0xe5bef008); /* ldr pc, [lr, #8]! */
}
symattr->plt_offset = plt->data_offset;
if (symattr->plt_thumb_stub) {
p = section_ptr_add(plt, 20);
- put32(p, 0x4778); /* bx pc */
- put32(p+2, 0x46c0); /* nop */
+ write32le(p, 0x4778); /* bx pc */
+ write32le(p+2, 0x46c0); /* nop */
p += 4;
} else
p = section_ptr_add(plt, 16);
- put32(p, 0xe59fc004); /* ldr ip, [pc, #4] ; GOT entry offset */
- put32(p+4, 0xe08fc00c); /* add ip, pc, ip ; addr of GOT entry */
- put32(p+8, 0xe59cf000); /* ldr pc, [ip] ; jump to GOT entry */
- put32(p+12, s1->got->data_offset); /* GOT entry off once patched */
+ write32le(p, 0xe59fc004); /* ldr ip, [pc, #4] ; GOT entry offset
*/
+ write32le(p+4, 0xe08fc00c); /* add ip, pc, ip ; addr of GOT entry
*/
+ write32le(p+8, 0xe59cf000); /* ldr pc, [ip] ; jump to GOT entry */
+ write32le(p+12, s1->got->data_offset); /* GOT entry off once
patched */
/* the symbol is modified so that it will be relocated to
the PLT */
@@ -1237,8 +1220,8 @@ static unsigned long put_got_entry(TCCState *s1,
section_ptr_add(plt, 32);
symattr->plt_offset = plt->data_offset;
p = section_ptr_add(plt, 16);
- put32(p, s1->got->data_offset);
- put32(p + 4, (uint64_t)s1->got->data_offset >> 32);
+ write32le(p, s1->got->data_offset);
+ write32le(p + 4, (uint64_t)s1->got->data_offset >> 32);
if (sym->st_shndx == SHN_UNDEF)
offset = plt->data_offset - 16;
@@ -1381,9 +1364,9 @@ ST_FUNC void build_got_entries(TCCState *s1)
text_section->data_offset + 4, R_ARM_JUMP24,
sym_index);
p = section_ptr_add(text_section, 8);
- put32(p, 0x4778); /* bx pc */
- put32(p+2, 0x46c0); /* nop */
- put32(p+4, 0xeafffffe); /* b $sym */
+ write32le(p, 0x4778); /* bx pc */
+ write32le(p+2, 0x46c0); /* nop */
+ write32le(p+4, 0xeafffffe); /* b $sym */
}
#elif defined(TCC_TARGET_ARM64)
//xx Other cases may be required here:
@@ -1595,7 +1578,7 @@ ST_FUNC void tcc_add_bcheck(TCCState *s1)
Section *init_section = find_section(s1, ".init");
unsigned char *pinit = section_ptr_add(init_section, 5);
pinit[0] = 0xe8;
- put32(pinit + 1, -4);
+ write32le(pinit + 1, -4);
put_elf_reloc(symtab_section, init_section,
init_section->data_offset - 4, R_386_PC32, sym_index);
}
@@ -1761,9 +1744,9 @@ ST_FUNC void fill_got_entry(TCCState *s1, ElfW_Rel *rel)
section_reserve(s1->got, offset + PTR_SIZE);
#ifdef TCC_TARGET_X86_64
/* only works for x86-64 */
- put32(s1->got->data + offset + 4, sym->st_value >> 32);
+ write32le(s1->got->data + offset + 4, sym->st_value >> 32);
#endif
- put32(s1->got->data + offset, sym->st_value & 0xffffffff);
+ write32le(s1->got->data + offset, sym->st_value & 0xffffffff);
}
/* Perform relocation to GOT or PLT entries */
@@ -1930,20 +1913,20 @@ ST_FUNC void relocate_plt(TCCState *s1)
p_end = p + s1->plt->data_offset;
if (p < p_end) {
#if defined(TCC_TARGET_I386)
- put32(p + 2, get32(p + 2) + s1->got->sh_addr);
- put32(p + 8, get32(p + 8) + s1->got->sh_addr);
+ write32le(p + 2, read32le(p + 2) + s1->got->sh_addr);
+ write32le(p + 8, read32le(p + 8) + s1->got->sh_addr);
p += 16;
while (p < p_end) {
- put32(p + 2, get32(p + 2) + s1->got->sh_addr);
+ write32le(p + 2, read32le(p + 2) + s1->got->sh_addr);
p += 16;
}
#elif defined(TCC_TARGET_X86_64)
int x = s1->got->sh_addr - s1->plt->sh_addr - 6;
- put32(p + 2, get32(p + 2) + x);
- put32(p + 8, get32(p + 8) + x - 6);
+ write32le(p + 2, read32le(p + 2) + x);
+ write32le(p + 8, read32le(p + 8) + x - 6);
p += 16;
while (p < p_end) {
- put32(p + 2, get32(p + 2) + x + s1->plt->data - p);
+ write32le(p + 2, read32le(p + 2) + x + s1->plt->data - p);
p += 16;
}
#elif defined(TCC_TARGET_ARM)
@@ -1951,9 +1934,9 @@ ST_FUNC void relocate_plt(TCCState *s1)
x=s1->got->sh_addr - s1->plt->sh_addr - 12;
p += 16;
while (p < p_end) {
- if (get32(p) == 0x46c04778) /* PLT Thumb stub present */
+ if (read32le(p) == 0x46c04778) /* PLT Thumb stub present */
p += 4;
- put32(p + 12, x + get32(p + 12) + s1->plt->data - p);
+ write32le(p + 12, x + read32le(p + 12) + s1->plt->data - p);
p += 16;
}
#elif defined(TCC_TARGET_ARM64)
@@ -1962,32 +1945,32 @@ ST_FUNC void relocate_plt(TCCState *s1)
uint64_t off = (got >> 12) - (plt >> 12);
if ((off + ((uint32_t)1 << 20)) >> 21)
tcc_error("Failed relocating PLT (off=0x%lx, got=0x%lx,
plt=0x%lx)", off, got, plt);
- put32(p, 0xa9bf7bf0); // stp x16,x30,[sp,#-16]!
- put32(p + 4, (0x90000010 | // adrp x16,...
- (off & 0x1ffffc) << 3 | (off & 3) << 29));
- put32(p + 8, (0xf9400211 | // ldr x17,[x16,#...]
- (got & 0xff8) << 7));
- put32(p + 12, (0x91000210 | // add x16,x16,#...
- (got & 0xfff) << 10));
- put32(p + 16, 0xd61f0220); // br x17
- put32(p + 20, 0xd503201f); // nop
- put32(p + 24, 0xd503201f); // nop
- put32(p + 28, 0xd503201f); // nop
+ write32le(p, 0xa9bf7bf0); // stp x16,x30,[sp,#-16]!
+ write32le(p + 4, (0x90000010 | // adrp x16,...
+ (off & 0x1ffffc) << 3 | (off & 3) << 29));
+ write32le(p + 8, (0xf9400211 | // ldr x17,[x16,#...]
+ (got & 0xff8) << 7));
+ write32le(p + 12, (0x91000210 | // add x16,x16,#...
+ (got & 0xfff) << 10));
+ write32le(p + 16, 0xd61f0220); // br x17
+ write32le(p + 20, 0xd503201f); // nop
+ write32le(p + 24, 0xd503201f); // nop
+ write32le(p + 28, 0xd503201f); // nop
p += 32;
while (p < p_end) {
uint64_t pc = plt + (p - s1->plt->data);
uint64_t addr = got +
- (get32(p) | (uint64_t)get32(p + 4) << 32);
+ (read32le(p) | (uint64_t)read32le(p + 4) << 32);
uint32_t off = (addr >> 12) - (pc >> 12);
if ((off + ((uint32_t)1 << 20)) >> 21)
tcc_error("Failed relocating PLT (off=0x%lx, addr=0x%lx,
pc=0x%lx)", off, addr, pc);
- put32(p, (0x90000010 | // adrp x16,...
- (off & 0x1ffffc) << 3 | (off & 3) << 29));
- put32(p + 4, (0xf9400211 | // ldr x17,[x16,#...]
- (addr & 0xff8) << 7));
- put32(p + 8, (0x91000210 | // add x16,x16,#...
- (addr & 0xfff) << 10));
- put32(p + 12, 0xd61f0220); // br x17
+ write32le(p, (0x90000010 | // adrp x16,...
+ (off & 0x1ffffc) << 3 | (off & 3) << 29));
+ write32le(p + 4, (0xf9400211 | // ldr x17,[x16,#...]
+ (addr & 0xff8) << 7));
+ write32le(p + 8, (0x91000210 | // add x16,x16,#...
+ (addr & 0xfff) << 10));
+ write32le(p + 12, 0xd61f0220); // br x17
p += 16;
}
#elif defined(TCC_TARGET_C67)
@@ -2654,7 +2637,7 @@ static int elf_output_file(TCCState *s1, const char
*filename)
fill_dynamic(s1, &dyninf);
/* put in GOT the dynamic section address and relocate PLT */
- put32(s1->got->data, dynamic->sh_addr);
+ write32le(s1->got->data, dynamic->sh_addr);
if (file_type == TCC_OUTPUT_EXE
#if defined(TCC_OUTPUT_DLL_WITH_PLT)
|| file_type == TCC_OUTPUT_DLL
_______________________________________________
Tinycc-devel mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/tinycc-devel