I noticed in some places o() was being broken 
into multiple calls because of int's 4 byte limit
so I extended the size to uint64_t.

I added and refactored some comments where needed as well.

The repo link is https://github.com/usesc/tinycc

---
 tcc.h        |   2 +-
 x86_64-gen.c | 159 ++++++++++++++++++++++++++++-----------------------
 2 files changed, 90 insertions(+), 71 deletions(-)

diff --git a/tcc.h b/tcc.h
index 1c2f6949..87c6f4ec 100644
--- a/tcc.h
+++ b/tcc.h
@@ -1637,7 +1637,7 @@ ST_FUNC void gen_cvt_itof(int t);
 ST_FUNC void gen_cvt_ftof(int t);
 ST_FUNC void ggoto(void);
 #ifndef TCC_TARGET_C67
-ST_FUNC void o(unsigned int c);
+ST_FUNC void o(uint64_t c);
 #endif
 ST_FUNC void gen_vla_sp_save(int addr);
 ST_FUNC void gen_vla_sp_restore(int addr);
diff --git a/x86_64-gen.c b/x86_64-gen.c
index 0e63e685..f853a40f 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -122,7 +122,7 @@ ST_DATA const char * const target_machine_defs =
     "__x86_64__\0"
     "__x86_64\0"
     "__amd64__\0"
-    ;
+;
 
 ST_DATA const int reg_classes[NB_REGS] = {
     /* eax */ RC_INT | RC_RAX,
@@ -181,7 +181,7 @@ ST_FUNC void g(int c)
     ind = ind1;
 }
 
-ST_FUNC void o(unsigned int c)
+ST_FUNC void o(uint64_t c)
 {
     while (c) {
         g(c);
@@ -189,6 +189,9 @@ ST_FUNC void o(unsigned int c)
     }
 }
 
+/* generate little-endian functions */
+/* mostly for immediates and displacements */
+
 ST_FUNC void gen_le16(int v)
 {
     g(v);
@@ -256,6 +259,17 @@ static int oad(int c, int s)
     return t;
 }
 
+static int oaw(int c, int s)
+{
+    int t;
+    if (nocode_wanted)
+        return s;
+    o(c);
+    t = ind;
+    gen_le16(s);
+    return t;
+}
+
 /* generate jmp to a label */
 #define gjmp2(instr,lbl) oad(instr,lbl)
 
@@ -550,10 +564,12 @@ void load(int r, SValue *sv)
             } else if (r == TREG_ST0) {
                 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
                 /* gen_cvt_ftof(VT_LDOUBLE); */
+
                 /* movsd %xmmN,-0x10(%rsp) */
                 o(0x110ff2);
                 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
                 o(0xf024);
+
                 o(0xf02444dd); /* fldl -0x10(%rsp) */
             } else {
                 orex(is64_type(ft), r, v, 0x89);
@@ -584,8 +600,7 @@ void store(int r, SValue *v)
     if (fr == VT_CONST
         && (v->r & VT_SYM)
         && !(v->sym->type.t & VT_STATIC)) {
-        /* mov xx(%rip), %r11 */
-        o(0x1d8b4c);
+        o(0x1d8b4c); /* mov xx(%rip), %r11 */
         gen_gotpcrel(TREG_R11, v->sym, v->c.i);
         pic = is64_type(bt) ? 0x49 : 0x41;
     }
@@ -626,7 +641,7 @@ void store(int r, SValue *v)
     } else if (op64) {
         if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
             gen_modrm64(op64, r, v->r, v->sym, fc);
-        } else if (fr != r) {
+       } else if (fr != r) {
             orex(1, fr, r, op64);
             o(0xc0 + fr + r * 8); /* mov r, fr */
         }
@@ -652,8 +667,7 @@ static void gcall_or_jmp(int is_jmp)
         /* otherwise, indirect call */
         r = TREG_R11;
         load(r, vtop);
-        o(0x41); /* REX */
-        o(0xff); /* call/jmp *r */
+       o(0xff41); /* CALL/JMP *r */
         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
     }
 }
@@ -712,20 +726,20 @@ static void gen_bounds_epilog(void)
     }
 
     /* generate bound check local freeing */
-    o(0x5250); /* save returned value, if any */
-    o(0x20ec8348); /* sub $32,%rsp */
-    o(0x290f);     /* movaps %xmm0,0x10(%rsp) */
-    o(0x102444);
-    o(0x240c290f); /* movaps %xmm1,(%rsp) */
+    o(0x5250);       /* push %rax; push %rdx; save returned value, if any */
+    o(0x20ec8348);   /* sub $32,%rsp */
+    o(0x102444290f); /* movaps %xmm0,0x10(%rsp) */
+    o(0x240c290f);   /* movaps %xmm1,(%rsp) */
+
     greloca(cur_text_section, sym_data, ind + 3, R_X86_64_PC32, -4);
     o(0x0d8d48 + ((TREG_FASTCALL_1 == TREG_RDI) * 0x300000)); /* lea 
xxx(%rip), %rcx/rdi */
     gen_le32 (0);
     gen_bounds_call(TOK___bound_local_delete);
-    o(0x280f);     /* movaps 0x10(%rsp),%xmm0 */
-    o(0x102444);
-    o(0x240c280f); /* movaps (%rsp),%xmm1 */
-    o(0x20c48348); /* add $32,%rsp */
-    o(0x585a); /* restore returned value, if any */
+
+    o(0x102444280f); /* movaps 0x10(%rsp),%xmm0 */
+    o(0x240c280f);   /* movaps (%rsp),%xmm1 */
+    o(0x20c48348);   /* add $32,%rsp */
+    o(0x585a);       /* pop %rdx; pop %rax; restore returned value, if any */
 }
 #endif
 
@@ -1042,8 +1056,8 @@ void gfunc_epilog(void)
         greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4);
         o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
     } else {
-        o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
-        o(0xec8148);  /* sub rsp, stacksize */
+        o(0xe5894855);  /* push %rbp; mov %rsp, %rbp */
+        o(0xec8148);    /* sub rsp, stacksize */
         gen_le32(v);
     }
     ind = cur_text_section->data_offset;
@@ -1301,7 +1315,7 @@ void gfunc_call(int nb_args)
               (stack grows down), so the adjustment needs to happen _after_
               an argument that requires it.  */
             if (stack_adjust) {
-               o(0x50); /* push %rax; aka sub $8,%rsp */
+               o(0x50); /* push %rax; AKA sub $8,%rsp */
                 args_size += 8;
                stack_adjust = 0;
             }
@@ -1314,8 +1328,8 @@ void gfunc_call(int nb_args)
        switch (vtop->type.t & VT_BTYPE) {
            case VT_STRUCT:
                /* allocate the necessary size on stack */
-               o(0x48);
-               oad(0xec81, size); /* sub $xxx, %rsp */
+               oad(0xEC8148, size); /* sub $xxx, %rsp */
+
                /* generate structure store */
                r = get_reg(RC_INT);
                orex(1, r, 0, 0x89); /* mov %rsp, r */
@@ -1334,21 +1348,21 @@ void gfunc_call(int nb_args)
 
            case VT_LDOUBLE:
                 gv(RC_ST0);
-                oad(0xec8148, size); /* sub $xxx, %rsp */
-                o(0x7cdb); /* fstpt 0(%rsp) */
-                g(0x24);
-                g(0x00);
+                oad(0xec8148, size);   /* sub $xxx, %rsp */
+               gen_le32(0x00247cdb);  /* fstpt 0(%rsp) */
                break;
 
            case VT_FLOAT:
            case VT_DOUBLE:
                assert(mode == x86_64_mode_sse);
                r = gv(RC_FLOAT);
-               o(0x50); /* push $rax */
+               o(0x50); /* push %rax */
+
                /* movq %xmmN, (%rsp) */
                o(0xd60f66);
                o(0x04 + REG_VALUE(r)*8);
                o(0x24);
+
                break;
 
            default:
@@ -1420,9 +1434,8 @@ void gfunc_call(int nb_args)
     /* Copy R10 and R11 into RDX and RCX, respectively */
     if (nb_reg_args > 2) {
         o(0xd2894c); /* mov %r10, %rdx */
-        if (nb_reg_args > 3) {
+        if (nb_reg_args > 3) 
             o(0xd9894c); /* mov %r11, %rcx */
-        }
     }
 
     if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or 
FUNC_ELLIPSIS */
@@ -1491,19 +1504,15 @@ void gfunc_prolog(Sym *func_sym)
 
         loc -= 24;
         /* movl $0x????????, -0x18(%rbp) */
-        o(0xe845c7);
-        gen_le32(seen_reg_num * 8);
+       oad(0xe845c7, seen_reg_num * 8);
         /* movl $0x????????, -0x14(%rbp) */
-        o(0xec45c7);
-        gen_le32(seen_sse_num * 16 + 48);
+       oad(0xec45c7, seen_sse_num * 16 + 48);
        /* leaq $0x????????, %r11 */
-       o(0x9d8d4c);
-       gen_le32(seen_stack_size);
+       oad(0x9d8d4c, seen_stack_size);
        /* movq %r11, -0x10(%rbp) */
        o(0xf05d894c);
        /* leaq $-200(%rbp), %r11 */
-       o(0x9d8d4c);
-       gen_le32(-176 - 24);
+       oad(0x9d8d4c, -176 - 24);
        /* movq %r11, -0x8(%rbp) */
        o(0xf85d894c);
 
@@ -1515,9 +1524,8 @@ void gfunc_prolog(Sym *func_sym)
                gen_modrm(7 - i, VT_LOCAL, NULL, loc);
            }
             /* movq $0, loc+8(%rbp) */
-            o(0x85c748);
-            gen_le32(loc + 8);
-            gen_le32(0);
+           oad(0x85c748, loc + 8); /* opcode + disp */
+            gen_le32(0);            /* immediate */
         }
         for (i = 0; i < REGN; i++) {
             push_arg_reg(REGN-1-i);
@@ -1606,17 +1614,15 @@ void gfunc_epilog(void)
     if (func_ret_sub == 0) {
         o(0xc3); /* ret */
     } else {
-        o(0xc2); /* ret n */
-        g(func_ret_sub);
-        g(func_ret_sub >> 8);
+       /* ret n */
+       oaw(0xc2, func_ret_sub); 
     }
     /* align local size to word & save local variables */
     v = (-loc + 15) & -16;
     saved_ind = ind;
     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
-    o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
-    o(0xec8148);  /* sub rsp, stacksize */
-    gen_le32(v);
+    o(0xe5894855);    /* push %rbp; mov %rsp, %rbp */
+    oad(0xec8148, v); /* sub rsp, stacksize */
     ind = saved_ind;
 }
 
@@ -1799,7 +1805,7 @@ void gen_opi(int op)
         vtop--;
         save_reg(TREG_RDX);
         orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
-        orex(ll, fr, 0, 0xf7); /* div fr, %eax */
+        orex(ll, fr, 0, 0xf7);              /* div fr, %eax */
         o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
         if (op == '%' || op == TOK_UMOD)
             r = TREG_RDX;
@@ -1876,8 +1882,8 @@ void gen_opf(int op)
                 o(0xc9d9); /* fxch %st(1) */
             if (op == TOK_EQ || op == TOK_NE)
                 o(0xe9da); /* fucompp */
-            else
-                o(0xd9de); /* fcompp */
+            else 
+               o(0xd9de); /* fcompp */
             o(0xe0df); /* fnstsw %ax */
             if (op == TOK_EQ) {
                 o(0x45e480); /* and $0x45, %ah */
@@ -1922,7 +1928,7 @@ void gen_opf(int op)
             ft = vtop->type.t;
             fc = vtop->c.i;
             o(0xde); /* fxxxp %st, %st(1) */
-            o(0xc1 + (a << 3));
+            o(0xc1 + (a << 3)); /* Shift operation */
             vtop--;
         }
     } else {
@@ -2050,20 +2056,23 @@ void gen_cvt_itof(int t)
             /* signed long long to float/double/long double (unsigned case
                is handled generically) */
             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
-            o(0x242cdf); /* fildll (%rsp) */
+            o(0x242cdf);   /* fildll (%rsp) */
             o(0x08c48348); /* add $8, %rsp */
         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
                    (VT_INT | VT_UNSIGNED)) {
             /* unsigned int to float/double/long double */
-            o(0x6a); /* push $0 */
+
+           /* push $0 */
+            o(0x6a); 
             g(0x00);
+
             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
-            o(0x242cdf); /* fildll (%rsp) */
+            o(0x242cdf);   /* fildll (%rsp) */
             o(0x10c48348); /* add $16, %rsp */
         } else {
             /* int to float/double/long double */
             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
-            o(0x2404db); /* fildl (%rsp) */
+            o(0x2404db);   /* fildl (%rsp) */
             o(0x08c48348); /* add $8, %rsp */
         }
         vtop->r = TREG_ST0;
@@ -2076,8 +2085,11 @@ void gen_cvt_itof(int t)
             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
             o(0x48); /* REX */
         }
+
+       /* cvtsi2sd */
         o(0x2a0f);
-        o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
+        o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8);
+
         vtop->r = r;
     }
 }
@@ -2094,32 +2106,38 @@ void gen_cvt_ftof(int t)
     if (bt == VT_FLOAT) {
         gv(RC_FLOAT);
         if (tbt == VT_DOUBLE) {
-            o(0x140f); /* unpcklps */
+           /* unpcklps */
+            o(0x140f); 
             o(0xc0 + REG_VALUE(vtop->r)*9);
-            o(0x5a0f); /* cvtps2pd */
+           /* cvtps2pd */
+            o(0x5a0f);
             o(0xc0 + REG_VALUE(vtop->r)*9);
         } else if (tbt == VT_LDOUBLE) {
             save_reg(RC_ST0);
             /* movss %xmm0,-0x10(%rsp) */
             o(0x110ff3);
             o(0x44 + REG_VALUE(vtop->r)*8);
-            o(0xf024);
+            o(0xf024);  /* [rsp - 10] */
+
             o(0xf02444d9); /* flds -0x10(%rsp) */
             vtop->r = TREG_ST0;
         }
     } else if (bt == VT_DOUBLE) {
         gv(RC_FLOAT);
         if (tbt == VT_FLOAT) {
-            o(0x140f66); /* unpcklpd */
+           /* unpcklpd */
+            o(0x140f66); 
             o(0xc0 + REG_VALUE(vtop->r)*9);
-            o(0x5a0f66); /* cvtpd2ps */
+           /* cvtpd2ps */
+            o(0x5a0f66);
             o(0xc0 + REG_VALUE(vtop->r)*9);
         } else if (tbt == VT_LDOUBLE) {
             save_reg(RC_ST0);
             /* movsd %xmm0,-0x10(%rsp) */
             o(0x110ff2);
             o(0x44 + REG_VALUE(vtop->r)*8);
-            o(0xf024);
+            o(0xf024);  /* [rsp - 10] */
+
             o(0xf02444dd); /* fldl -0x10(%rsp) */
             vtop->r = TREG_ST0;
         }
@@ -2179,7 +2197,8 @@ void gen_cvt_ftoi(int t)
     } else {
         assert(0);
     }
-    orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
+    /* cvttss2si or cvttsd2si */
+    orex(size == 8, r, 0, 0x2c0f); 
     o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
     vtop->r = r;
 }
@@ -2283,20 +2302,20 @@ ST_FUNC void gen_struct_copy(int size)
 #endif
     gv2(RC_RDI, RC_RSI);
     if (n <= 4) {
-        while (n)
-            o(0xa548), --n;
+        for (; n; --n)
+            o(0xa548); /* movsq */
     } else {
         vpushi(n);
         gv(RC_RCX);
-        o(0xa548f3);
+        o(0xa548f3); /* rep movsq */
         vpop();
     }
     if (size & 0x04)
-        o(0xa5);
+        o(0xa5);   /* movsd */
     if (size & 0x02)
-        o(0xa566);
+        o(0xa566); /* movsw */
     if (size & 0x01)
-        o(0xa4);
+        o(0xa4);   /* movsb */
 #ifdef TCC_TARGET_PE
     o(0x5e5f); /* pop rdi, rsi */
 #endif
@@ -2305,6 +2324,6 @@ ST_FUNC void gen_struct_copy(int size)
 }
 
 /* end of x86-64 code generator */
-/*************************************************************/
-#endif /* ! TARGET_DEFS_ONLY */
+/******************************************************/
+#endif /* ! TARGET_DEFS_ONLY    */
 /******************************************************/
-- 
2.51.2


_______________________________________________
Tinycc-devel mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/tinycc-devel

Reply via email to