I noticed in some places o() was being broken into multiple calls because of int's 4 byte limit so I extended the size to uint64_t.
I added and refactored some comments where needed as well. The repo link is https://github.com/usesc/tinycc --- tcc.h | 2 +- x86_64-gen.c | 159 ++++++++++++++++++++++++++++----------------------- 2 files changed, 90 insertions(+), 71 deletions(-) diff --git a/tcc.h b/tcc.h index 1c2f6949..87c6f4ec 100644 --- a/tcc.h +++ b/tcc.h @@ -1637,7 +1637,7 @@ ST_FUNC void gen_cvt_itof(int t); ST_FUNC void gen_cvt_ftof(int t); ST_FUNC void ggoto(void); #ifndef TCC_TARGET_C67 -ST_FUNC void o(unsigned int c); +ST_FUNC void o(uint64_t c); #endif ST_FUNC void gen_vla_sp_save(int addr); ST_FUNC void gen_vla_sp_restore(int addr); diff --git a/x86_64-gen.c b/x86_64-gen.c index 0e63e685..f853a40f 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -122,7 +122,7 @@ ST_DATA const char * const target_machine_defs = "__x86_64__\0" "__x86_64\0" "__amd64__\0" - ; +; ST_DATA const int reg_classes[NB_REGS] = { /* eax */ RC_INT | RC_RAX, @@ -181,7 +181,7 @@ ST_FUNC void g(int c) ind = ind1; } -ST_FUNC void o(unsigned int c) +ST_FUNC void o(uint64_t c) { while (c) { g(c); @@ -189,6 +189,9 @@ ST_FUNC void o(unsigned int c) } } +/* generate little-endian functions */ +/* mostly for immediates and displacements */ + ST_FUNC void gen_le16(int v) { g(v); @@ -256,6 +259,17 @@ static int oad(int c, int s) return t; } +static int oaw(int c, int s) +{ + int t; + if (nocode_wanted) + return s; + o(c); + t = ind; + gen_le16(s); + return t; +} + /* generate jmp to a label */ #define gjmp2(instr,lbl) oad(instr,lbl) @@ -550,10 +564,12 @@ void load(int r, SValue *sv) } else if (r == TREG_ST0) { assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); /* gen_cvt_ftof(VT_LDOUBLE); */ + /* movsd %xmmN,-0x10(%rsp) */ o(0x110ff2); o(0x44 + REG_VALUE(r)*8); /* %xmmN */ o(0xf024); + o(0xf02444dd); /* fldl -0x10(%rsp) */ } else { orex(is64_type(ft), r, v, 0x89); @@ -584,8 +600,7 @@ void store(int r, SValue *v) if (fr == VT_CONST && (v->r & VT_SYM) && !(v->sym->type.t & VT_STATIC)) { - /* mov xx(%rip), %r11 */ - o(0x1d8b4c); + o(0x1d8b4c); /* mov xx(%rip), %r11 */ gen_gotpcrel(TREG_R11, v->sym, v->c.i); pic = is64_type(bt) ? 0x49 : 0x41; } @@ -626,7 +641,7 @@ void store(int r, SValue *v) } else if (op64) { if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { gen_modrm64(op64, r, v->r, v->sym, fc); - } else if (fr != r) { + } else if (fr != r) { orex(1, fr, r, op64); o(0xc0 + fr + r * 8); /* mov r, fr */ } @@ -652,8 +667,7 @@ static void gcall_or_jmp(int is_jmp) /* otherwise, indirect call */ r = TREG_R11; load(r, vtop); - o(0x41); /* REX */ - o(0xff); /* call/jmp *r */ + o(0xff41); /* CALL/JMP *r */ o(0xd0 + REG_VALUE(r) + (is_jmp << 4)); } } @@ -712,20 +726,20 @@ static void gen_bounds_epilog(void) } /* generate bound check local freeing */ - o(0x5250); /* save returned value, if any */ - o(0x20ec8348); /* sub $32,%rsp */ - o(0x290f); /* movaps %xmm0,0x10(%rsp) */ - o(0x102444); - o(0x240c290f); /* movaps %xmm1,(%rsp) */ + o(0x5250); /* push %rax; push %rdx; save returned value, if any */ + o(0x20ec8348); /* sub $32,%rsp */ + o(0x102444290f); /* movaps %xmm0,0x10(%rsp) */ + o(0x240c290f); /* movaps %xmm1,(%rsp) */ + greloca(cur_text_section, sym_data, ind + 3, R_X86_64_PC32, -4); o(0x0d8d48 + ((TREG_FASTCALL_1 == TREG_RDI) * 0x300000)); /* lea xxx(%rip), %rcx/rdi */ gen_le32 (0); gen_bounds_call(TOK___bound_local_delete); - o(0x280f); /* movaps 0x10(%rsp),%xmm0 */ - o(0x102444); - o(0x240c280f); /* movaps (%rsp),%xmm1 */ - o(0x20c48348); /* add $32,%rsp */ - o(0x585a); /* restore returned value, if any */ + + o(0x102444280f); /* movaps 0x10(%rsp),%xmm0 */ + o(0x240c280f); /* movaps (%rsp),%xmm1 */ + o(0x20c48348); /* add $32,%rsp */ + o(0x585a); /* pop %rdx; pop %rax; restore returned value, if any */ } #endif @@ -1042,8 +1056,8 @@ void gfunc_epilog(void) greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4); o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */ } else { - o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ - o(0xec8148); /* sub rsp, stacksize */ + o(0xe5894855); /* push %rbp; mov %rsp, %rbp */ + o(0xec8148); /* sub rsp, stacksize */ gen_le32(v); } ind = cur_text_section->data_offset; @@ -1301,7 +1315,7 @@ void gfunc_call(int nb_args) (stack grows down), so the adjustment needs to happen _after_ an argument that requires it. */ if (stack_adjust) { - o(0x50); /* push %rax; aka sub $8,%rsp */ + o(0x50); /* push %rax; AKA sub $8,%rsp */ args_size += 8; stack_adjust = 0; } @@ -1314,8 +1328,8 @@ void gfunc_call(int nb_args) switch (vtop->type.t & VT_BTYPE) { case VT_STRUCT: /* allocate the necessary size on stack */ - o(0x48); - oad(0xec81, size); /* sub $xxx, %rsp */ + oad(0xEC8148, size); /* sub $xxx, %rsp */ + /* generate structure store */ r = get_reg(RC_INT); orex(1, r, 0, 0x89); /* mov %rsp, r */ @@ -1334,21 +1348,21 @@ void gfunc_call(int nb_args) case VT_LDOUBLE: gv(RC_ST0); - oad(0xec8148, size); /* sub $xxx, %rsp */ - o(0x7cdb); /* fstpt 0(%rsp) */ - g(0x24); - g(0x00); + oad(0xec8148, size); /* sub $xxx, %rsp */ + gen_le32(0x00247cdb); /* fstpt 0(%rsp) */ break; case VT_FLOAT: case VT_DOUBLE: assert(mode == x86_64_mode_sse); r = gv(RC_FLOAT); - o(0x50); /* push $rax */ + o(0x50); /* push %rax */ + /* movq %xmmN, (%rsp) */ o(0xd60f66); o(0x04 + REG_VALUE(r)*8); o(0x24); + break; default: @@ -1420,9 +1434,8 @@ void gfunc_call(int nb_args) /* Copy R10 and R11 into RDX and RCX, respectively */ if (nb_reg_args > 2) { o(0xd2894c); /* mov %r10, %rdx */ - if (nb_reg_args > 3) { + if (nb_reg_args > 3) o(0xd9894c); /* mov %r11, %rcx */ - } } if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */ @@ -1491,19 +1504,15 @@ void gfunc_prolog(Sym *func_sym) loc -= 24; /* movl $0x????????, -0x18(%rbp) */ - o(0xe845c7); - gen_le32(seen_reg_num * 8); + oad(0xe845c7, seen_reg_num * 8); /* movl $0x????????, -0x14(%rbp) */ - o(0xec45c7); - gen_le32(seen_sse_num * 16 + 48); + oad(0xec45c7, seen_sse_num * 16 + 48); /* leaq $0x????????, %r11 */ - o(0x9d8d4c); - gen_le32(seen_stack_size); + oad(0x9d8d4c, seen_stack_size); /* movq %r11, -0x10(%rbp) */ o(0xf05d894c); /* leaq $-200(%rbp), %r11 */ - o(0x9d8d4c); - gen_le32(-176 - 24); + oad(0x9d8d4c, -176 - 24); /* movq %r11, -0x8(%rbp) */ o(0xf85d894c); @@ -1515,9 +1524,8 @@ void gfunc_prolog(Sym *func_sym) gen_modrm(7 - i, VT_LOCAL, NULL, loc); } /* movq $0, loc+8(%rbp) */ - o(0x85c748); - gen_le32(loc + 8); - gen_le32(0); + oad(0x85c748, loc + 8); /* opcode + disp */ + gen_le32(0); /* immediate */ } for (i = 0; i < REGN; i++) { push_arg_reg(REGN-1-i); @@ -1606,17 +1614,15 @@ void gfunc_epilog(void) if (func_ret_sub == 0) { o(0xc3); /* ret */ } else { - o(0xc2); /* ret n */ - g(func_ret_sub); - g(func_ret_sub >> 8); + /* ret n */ + oaw(0xc2, func_ret_sub); } /* align local size to word & save local variables */ v = (-loc + 15) & -16; saved_ind = ind; ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; - o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ - o(0xec8148); /* sub rsp, stacksize */ - gen_le32(v); + o(0xe5894855); /* push %rbp; mov %rsp, %rbp */ + oad(0xec8148, v); /* sub rsp, stacksize */ ind = saved_ind; } @@ -1799,7 +1805,7 @@ void gen_opi(int op) vtop--; save_reg(TREG_RDX); orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */ - orex(ll, fr, 0, 0xf7); /* div fr, %eax */ + orex(ll, fr, 0, 0xf7); /* div fr, %eax */ o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr)); if (op == '%' || op == TOK_UMOD) r = TREG_RDX; @@ -1876,8 +1882,8 @@ void gen_opf(int op) o(0xc9d9); /* fxch %st(1) */ if (op == TOK_EQ || op == TOK_NE) o(0xe9da); /* fucompp */ - else - o(0xd9de); /* fcompp */ + else + o(0xd9de); /* fcompp */ o(0xe0df); /* fnstsw %ax */ if (op == TOK_EQ) { o(0x45e480); /* and $0x45, %ah */ @@ -1922,7 +1928,7 @@ void gen_opf(int op) ft = vtop->type.t; fc = vtop->c.i; o(0xde); /* fxxxp %st, %st(1) */ - o(0xc1 + (a << 3)); + o(0xc1 + (a << 3)); /* Shift operation */ vtop--; } } else { @@ -2050,20 +2056,23 @@ void gen_cvt_itof(int t) /* signed long long to float/double/long double (unsigned case is handled generically) */ o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ - o(0x242cdf); /* fildll (%rsp) */ + o(0x242cdf); /* fildll (%rsp) */ o(0x08c48348); /* add $8, %rsp */ } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED)) { /* unsigned int to float/double/long double */ - o(0x6a); /* push $0 */ + + /* push $0 */ + o(0x6a); g(0x00); + o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ - o(0x242cdf); /* fildll (%rsp) */ + o(0x242cdf); /* fildll (%rsp) */ o(0x10c48348); /* add $16, %rsp */ } else { /* int to float/double/long double */ o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ - o(0x2404db); /* fildl (%rsp) */ + o(0x2404db); /* fildl (%rsp) */ o(0x08c48348); /* add $8, %rsp */ } vtop->r = TREG_ST0; @@ -2076,8 +2085,11 @@ void gen_cvt_itof(int t) (vtop->type.t & VT_BTYPE) == VT_LLONG) { o(0x48); /* REX */ } + + /* cvtsi2sd */ o(0x2a0f); - o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */ + o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); + vtop->r = r; } } @@ -2094,32 +2106,38 @@ void gen_cvt_ftof(int t) if (bt == VT_FLOAT) { gv(RC_FLOAT); if (tbt == VT_DOUBLE) { - o(0x140f); /* unpcklps */ + /* unpcklps */ + o(0x140f); o(0xc0 + REG_VALUE(vtop->r)*9); - o(0x5a0f); /* cvtps2pd */ + /* cvtps2pd */ + o(0x5a0f); o(0xc0 + REG_VALUE(vtop->r)*9); } else if (tbt == VT_LDOUBLE) { save_reg(RC_ST0); /* movss %xmm0,-0x10(%rsp) */ o(0x110ff3); o(0x44 + REG_VALUE(vtop->r)*8); - o(0xf024); + o(0xf024); /* [rsp - 10] */ + o(0xf02444d9); /* flds -0x10(%rsp) */ vtop->r = TREG_ST0; } } else if (bt == VT_DOUBLE) { gv(RC_FLOAT); if (tbt == VT_FLOAT) { - o(0x140f66); /* unpcklpd */ + /* unpcklpd */ + o(0x140f66); o(0xc0 + REG_VALUE(vtop->r)*9); - o(0x5a0f66); /* cvtpd2ps */ + /* cvtpd2ps */ + o(0x5a0f66); o(0xc0 + REG_VALUE(vtop->r)*9); } else if (tbt == VT_LDOUBLE) { save_reg(RC_ST0); /* movsd %xmm0,-0x10(%rsp) */ o(0x110ff2); o(0x44 + REG_VALUE(vtop->r)*8); - o(0xf024); + o(0xf024); /* [rsp - 10] */ + o(0xf02444dd); /* fldl -0x10(%rsp) */ vtop->r = TREG_ST0; } @@ -2179,7 +2197,8 @@ void gen_cvt_ftoi(int t) } else { assert(0); } - orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */ + /* cvttss2si or cvttsd2si */ + orex(size == 8, r, 0, 0x2c0f); o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8); vtop->r = r; } @@ -2283,20 +2302,20 @@ ST_FUNC void gen_struct_copy(int size) #endif gv2(RC_RDI, RC_RSI); if (n <= 4) { - while (n) - o(0xa548), --n; + for (; n; --n) + o(0xa548); /* movsq */ } else { vpushi(n); gv(RC_RCX); - o(0xa548f3); + o(0xa548f3); /* rep movsq */ vpop(); } if (size & 0x04) - o(0xa5); + o(0xa5); /* movsd */ if (size & 0x02) - o(0xa566); + o(0xa566); /* movsw */ if (size & 0x01) - o(0xa4); + o(0xa4); /* movsb */ #ifdef TCC_TARGET_PE o(0x5e5f); /* pop rdi, rsi */ #endif @@ -2305,6 +2324,6 @@ ST_FUNC void gen_struct_copy(int size) } /* end of x86-64 code generator */ -/*************************************************************/ -#endif /* ! TARGET_DEFS_ONLY */ +/******************************************************/ +#endif /* ! TARGET_DEFS_ONLY */ /******************************************************/ -- 2.51.2 _______________________________________________ Tinycc-devel mailing list [email protected] https://lists.nongnu.org/mailman/listinfo/tinycc-devel
