Hi,

I made a patch for Extended Asm support for RISC-V.

I didn't test it further as I have a couple of questions about how it works. Mostly the `subst_asm_operand` function and `constraint_priority`. I tested it and it worked for my test cases but I'm not sure it's the best implementation. If anyone could review it, I'd be awesome.

Thanks,
Ekaitz


From 97638d3e6a9b12bd7a209e16d28307b20edddfbb Mon Sep 17 00:00:00 2001
From: Ekaitz Zarraga <eka...@elenq.tech>
Date: Sun, 17 Mar 2024 16:07:04 +0100
Subject: [PATCH] riscv: Add extended assembly support

NOTE: In order to be able to deal with general-purpose vs floating-point
registers, this commit adds a flag in the 6th bit of the register. If
set, it means the register is a floating-point one. This affects all the
assembler.
---
 riscv64-asm.c | 509 +++++++++++++++++++++++++++++++++++++++++++++++++-
 riscv64-gen.c |   1 -
 tccasm.c      |   3 +
 3 files changed, 503 insertions(+), 10 deletions(-)

diff --git a/riscv64-asm.c b/riscv64-asm.c
index 11a488ae..22b09c0c 100644
--- a/riscv64-asm.c
+++ b/riscv64-asm.c
@@ -7,7 +7,8 @@
 #ifdef TARGET_DEFS_ONLY

 #define CONFIG_TCC_ASM
-#define NB_ASM_REGS 32
+/* 32 general purpose + 32 floating point registers */
+#define NB_ASM_REGS 64

 ST_FUNC void g(int c);
 ST_FUNC void gen_le16(int c);
@@ -24,11 +25,15 @@ enum {
     OPT_IM12S,
     OPT_IM32,
 };
-#define C_ENCODE_RS1(register_index) ((register_index) << 7)
-#define C_ENCODE_RS2(register_index) ((register_index) << 2)
-#define ENCODE_RD(register_index) ((register_index) << 7)
-#define ENCODE_RS1(register_index) ((register_index) << 15)
-#define ENCODE_RS2(register_index) ((register_index) << 20)
+// Registers go from 0 to 31. We use next bit to choose general/float
+#define REG_FLOAT_MASK 0x20
+#define REG_IS_FLOAT(register_index) ((register_index) & REG_FLOAT_MASK)
+#define REG_VALUE(register_index) ((register_index) & (REG_FLOAT_MASK-1))
+#define C_ENCODE_RS1(register_index) (REG_VALUE(register_index) << 7)
+#define C_ENCODE_RS2(register_index) (REG_VALUE(register_index) << 2)
+#define ENCODE_RD(register_index)  (REG_VALUE(register_index) << 7)
+#define ENCODE_RS1(register_index) (REG_VALUE(register_index) << 15)
+#define ENCODE_RS2(register_index) (REG_VALUE(register_index) << 20)
 #define NTH_BIT(b, n) ((b >> n) & 1)
 #define OP_IM12S (1 << OPT_IM12S)
 #define OP_IM32 (1 << OPT_IM32)
@@ -1327,7 +1332,84 @@ static int asm_parse_csrvar(int t)

 ST_FUNC void subst_asm_operand(CString *add_str, SValue *sv, int modifier)
 {
-    tcc_error("RISCV64 asm not implemented.");
+    int r, reg, val;
+    char buf[64];
+
+    r = sv->r;
+    if ((r & VT_VALMASK) == VT_CONST) {
+        if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
+            modifier != 'P') {
+            //cstr_ccat(add_str, '#');
+        }
+        if (r & VT_SYM) {
+            const char *name = get_tok_str(sv->sym->v, NULL);
+            if (sv->sym->v >= SYM_FIRST_ANOM) {
+                /* In case of anonymous symbols ("L.42", used
+                   for static data labels) we can't find them
+                   in the C symbol table when later looking up
+                   this name.  So enter them now into the asm label
+                   list when we still know the symbol.  */
+                get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym);
+            }
+            if (tcc_state->leading_underscore)
+                cstr_ccat(add_str, '_');
+            cstr_cat(add_str, name, -1);
+            if ((uint32_t) sv->c.i == 0)
+                goto no_offset;
+            cstr_ccat(add_str, '+');
+        }
+        val = sv->c.i;
+        if (modifier == 'n')
+            val = -val;
+        if (modifier == 'z' && sv->c.i == 0) {
+            cstr_cat(add_str, "zero", -1);
+        } else {
+            snprintf(buf, sizeof(buf), "%d", (int) sv->c.i);
+            cstr_cat(add_str, buf, -1);
+        }
+      no_offset:;
+    } else if ((r & VT_VALMASK) == VT_LOCAL) {
+        snprintf(buf, sizeof(buf), "%d", (int) sv->c.i);
+        cstr_cat(add_str, buf, -1);
+    } else if (r & VT_LVAL) {
+        reg = r & VT_VALMASK;
+        if (reg >= VT_CONST)
+            tcc_internal_error("");
+        if ((sv->type.t & VT_BTYPE) == VT_FLOAT ||
+            (sv->type.t & VT_BTYPE) == VT_DOUBLE) {
+            /* floating point register */
+            reg = TOK_ASM_f0 + reg;
+        } else {
+            /* general purpose register */
+            reg = TOK_ASM_x0 + reg;
+        }
+        snprintf(buf, sizeof(buf), "%s", get_tok_str(reg, NULL));
+        cstr_cat(add_str, buf, -1);
+    } else {
+        /* register case */
+        reg = r & VT_VALMASK;
+        if (reg >= VT_CONST)
+            tcc_internal_error("");
+        if ((sv->type.t & VT_BTYPE) == VT_FLOAT ||
+            (sv->type.t & VT_BTYPE) == VT_DOUBLE) {
+            /* floating point register */
+            reg = TOK_ASM_f0 + reg;
+        } else {
+            /* general purpose register */
+            reg = TOK_ASM_x0 + reg;
+        }
+        snprintf(buf, sizeof(buf), "%s", get_tok_str(reg, NULL));
+        cstr_cat(add_str, buf, -1);
+    }
+}
+
+/* TCC does not use RISC-V register numbers internally, it uses 0-8 for
+ * integers and 8-16 for floats instead */
+static int tcc_ireg(int r){
+    return REG_VALUE(r) - 10;
+}
+static int tcc_freg(int r){
+    return REG_VALUE(r) - 10 + 8;
 }

 /* generate prolog and epilog code for asm statement */
@@ -1336,13 +1418,422 @@ ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
                          uint8_t *clobber_regs,
                          int out_reg)
 {
+    uint8_t regs_allocated[NB_ASM_REGS];
+    ASMOperand *op;
+    int i, reg;
+
+    static const uint8_t reg_saved[] = {
+        // General purpose regs
+        8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+        // Float regs
+        40, 41, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59
+    };
+
+    /* mark all used registers */
+    memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
+    for(i = 0; i < nb_operands; i++) {
+        op = &operands[i];
+        if (op->reg >= 0) {
+            regs_allocated[op->reg] = 1;
+        }
+    }
+
+    if(!is_output) {
+        /* generate reg save code */
+        for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
+            reg = reg_saved[i];
+            if (regs_allocated[reg]) {
+                /* push */
+                /* addi sp, sp, -offset */
+                gen_le32((4 << 2) | 3 |
+                        ENCODE_RD(2) | ENCODE_RS1(2) | -8 << 20);
+                if (REG_IS_FLOAT(reg)){
+                    /* fsd reg, offset(sp) */
+                    gen_le32( 0x27 | (3 << 12) |
+                            ENCODE_RS2(reg) | ENCODE_RS1(2) );
+                } else {
+                    /* sd reg, offset(sp) */
+                    gen_le32((0x8 << 2) | 3 | (3 << 12) |
+                            ENCODE_RS2(reg) | ENCODE_RS1(2) );
+                }
+            }
+        }
+
+        /* generate load code */
+        for(i = 0; i < nb_operands; i++) {
+            op = &operands[i];
+            if (op->reg >= 0) {
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
+                    op->is_memory) {
+                    /* memory reference case (for both input and
+                       output cases) */
+                    SValue sv;
+                    sv = *op->vt;
+                    sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
+                    sv.type.t = VT_PTR;
+                    load(tcc_ireg(op->reg), &sv);
+                } else if (i >= nb_outputs || op->is_rw) {
+                    /* load value in register */
+                    if ((op->vt->type.t & VT_BTYPE) == VT_FLOAT ||
+                        (op->vt->type.t & VT_BTYPE) == VT_DOUBLE) {
+                        load(tcc_freg(op->reg), op->vt);
+                    } else {
+                        load(tcc_ireg(op->reg), op->vt);
+                    }
+                    if (op->is_llong) {
+                        tcc_error("long long not implemented");
+                    }
+                }
+            }
+        }
+    } else {
+        /* generate save code */
+        for(i = 0 ; i < nb_outputs; i++) {
+            op = &operands[i];
+            if (op->reg >= 0) {
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
+                    if (!op->is_memory) {
+                        SValue sv;
+                        sv = *op->vt;
+                        sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
+                        sv.type.t = VT_PTR;
+                        load(tcc_ireg(out_reg), &sv);
+
+                        sv = *op->vt;
+                        sv.r = (sv.r & ~VT_VALMASK) | out_reg;
+                        store(tcc_ireg(op->reg), &sv);
+                    }
+                } else {
+                    if ((op->vt->type.t & VT_BTYPE) == VT_FLOAT ||
+                        (op->vt->type.t & VT_BTYPE) == VT_DOUBLE) {
+                        store(tcc_freg(op->reg), op->vt);
+                    } else {
+                        store(tcc_ireg(op->reg), op->vt);
+                    }
+                    if (op->is_llong) {
+                        tcc_error("long long not implemented");
+                    }
+                }
+            }
+        }
+        /* generate reg restore code for floating point registers */
+        for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
+            reg = reg_saved[i];
+            if (regs_allocated[reg]) {
+                /* pop */
+                if (REG_IS_FLOAT(reg)){
+                    /* fld reg, offset(sp) */
+                    gen_le32(7 | (3 << 12) |
+                            ENCODE_RD(reg) | ENCODE_RS1(2) | 0);
+                } else {
+                    /* ld reg, offset(sp) */
+                    gen_le32(3 | (3 << 12) |
+                            ENCODE_RD(reg) | ENCODE_RS1(2) | 0);
+                }
+                /* addi sp, sp, offset */
+                gen_le32((4 << 2) | 3 |
+                        ENCODE_RD(2) | ENCODE_RS1(2) | 8 << 20);
+            }
+        }
+    }
+}
+
+/* return the constraint priority (we allocate first the lowest
+   numbered constraints) */
+static inline int constraint_priority(const char *str)
+{
+    // TODO: How is this chosen??
+    int priority, c, pr;
+
+    /* we take the lowest priority */
+    priority = 0;
+    for(;;) {
+        c = *str;
+        if (c == '\0')
+            break;
+        str++;
+        switch(c) {
+        case 'A': // address that is held in a general-purpose register.
+        case 'S': // constraint that matches an absolute symbolic address.
+        case 'f': // register [float]
+        case 'r': // register [general]
+        case 'p': // valid memory address for load,store [general]
+            pr = 3;
+            break;
+        case 'I': // 12 bit signed immedate
+ case 'i': // immediate integer operand, including symbolic constants [general]
+        case 'm': // memory operand [general]
+ case 'g': // general-purpose-register, memory, immediate integer [general]
+            pr = 4;
+            break;
+        case 'v':
+            tcc_error("unimp: vector constraints", c);
+            pr = 0;
+            break;
+        default:
+            tcc_error("unknown constraint '%c'", c);
+            pr = 0;
+        }
+        if (pr > priority)
+            priority = pr;
+    }
+    return priority;
 }

+static const char *skip_constraint_modifiers(const char *p)
+{
+    /* Constraint modifier:
+        =   Operand is written to by this instruction
+        +   Operand is both read and written to by this instruction
+ % Instruction is commutative for this operand and the following operand.
+
+       Per-alternative constraint modifier:
+ & Operand is clobbered before the instruction is done using the input operands
+    */
+    while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
+        p++;
+    return p;
+}
+
+#define REG_OUT_MASK 0x01
+#define REG_IN_MASK  0x02
+
+#define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
+
 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
                                     int nb_operands, int nb_outputs,
                                     const uint8_t *clobber_regs,
                                     int *pout_reg)
 {
+    /* TODO: Simple constraints
+        whitespace  ignored
+        o  memory operand that is offsetable
+        V  memory but not offsetable
+ < memory operand with autodecrement addressing is allowed. Restrictions apply. + > memory operand with autoincrement addressing is allowed. Restrictions apply.
+        n  immediate integer operand with a known numeric value
+ E immediate floating operand (const_double) is allowed, but only if target=host + F immediate floating operand (const_double or const_vector) is allowed
+        s  immediate integer operand whose value is not an explicit integer
+        X  any operand whatsoever
+ 0...9 (postfix); (can also be more than 1 digit number); an operand that matches the specified operand number is allowed
+    */
+
+    /* TODO: RISCV constraints
+        J   The integer 0.
+        K   A 5-bit unsigned immediate for CSR access instructions.
+        A   An address that is held in a general-purpose register.
+        S   A constraint that matches an absolute symbolic address.
+        vr  A vector register (if available)..
+        vd  A vector register, excluding v0 (if available).
+        vm  A vector register, only v0 (if available).
+    */
+    ASMOperand *op;
+    int sorted_op[MAX_ASM_OPERANDS];
+    int i, j, k, p1, p2, tmp, reg, c, reg_mask;
+    const char *str;
+    uint8_t regs_allocated[NB_ASM_REGS];
+
+    /* init fields */
+    for (i = 0; i < nb_operands; i++) {
+        op = &operands[i];
+        op->input_index = -1;
+        op->ref_index = -1;
+        op->reg = -1;
+        op->is_memory = 0;
+        op->is_rw = 0;
+    }
+    /* compute constraint priority and evaluate references to output
+       constraints if input constraints */
+    for (i = 0; i < nb_operands; i++) {
+        op = &operands[i];
+        str = op->constraint;
+        str = skip_constraint_modifiers(str);
+        if (isnum(*str) || *str == '[') {
+            /* this is a reference to another constraint */
+            k = find_constraint(operands, nb_operands, str, NULL);
+            if ((unsigned) k >= i || i < nb_outputs)
+                tcc_error("invalid reference in constraint %d ('%s')",
+                          i, str);
+            op->ref_index = k;
+            if (operands[k].input_index >= 0)
+                tcc_error("cannot reference twice the same operand");
+            operands[k].input_index = i;
+            op->priority = 5;
+        } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
+                   && op->vt->sym
+                   && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
+            op->priority = 1;
+            op->reg = reg;
+        } else {
+            op->priority = constraint_priority(str);
+        }
+    }
+
+    /* sort operands according to their priority */
+    for (i = 0; i < nb_operands; i++)
+        sorted_op[i] = i;
+    for (i = 0; i < nb_operands - 1; i++) {
+        for (j = i + 1; j < nb_operands; j++) {
+            p1 = operands[sorted_op[i]].priority;
+            p2 = operands[sorted_op[j]].priority;
+            if (p2 < p1) {
+                tmp = sorted_op[i];
+                sorted_op[i] = sorted_op[j];
+                sorted_op[j] = tmp;
+            }
+        }
+    }
+
+    for (i = 0; i < NB_ASM_REGS; i++) {
+        if (clobber_regs[i])
+            regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
+        else
+            regs_allocated[i] = 0;
+    }
+
+    /* allocate registers and generate corresponding asm moves */
+    for (i = 0; i < nb_operands; i++) {
+        j = sorted_op[i];
+        op = &operands[j];
+        str = op->constraint;
+        /* no need to allocate references */
+        if (op->ref_index >= 0)
+            continue;
+        /* select if register is used for output, input or both */
+        if (op->input_index >= 0) {
+            reg_mask = REG_IN_MASK | REG_OUT_MASK;
+        } else if (j < nb_outputs) {
+            reg_mask = REG_OUT_MASK;
+        } else {
+            reg_mask = REG_IN_MASK;
+        }
+        if (op->reg >= 0) {
+            if (is_reg_allocated(op->reg))
+                tcc_error
+                    ("asm regvar requests register that's taken already");
+            reg = op->reg;
+            goto reg_found;
+        }
+      try_next:
+        c = *str++;
+        switch (c) {
+        case '=': // Operand is written-to
+            goto try_next;
+        case '+': // Operand is both READ and written-to
+            op->is_rw = 1;
+            /* FALL THRU */
+ case '&': // Operand is clobbered before the instruction is done using the input operands
+            if (j >= nb_outputs)
+ tcc_error("'%c' modifier can only be applied to outputs", c);
+            reg_mask = REG_IN_MASK | REG_OUT_MASK;
+            goto try_next;
+        case 'r': // general-purpose register
+        case 'p': // loadable/storable address
+            /* any general register */
+            /* From a0 to a7 */
+            for (reg = 10; reg <= 18; reg++) {
+                if (!is_reg_allocated(reg))
+                    goto reg_found;
+            }
+            goto try_next;
+          reg_found:
+            /* now we can reload in the register */
+            op->is_llong = 0;
+            op->reg = reg;
+            regs_allocated[reg] |= reg_mask;
+            break;
+        case 'f': // floating pont register
+            /* floating point register */
+            /* From fa0 to fa7 */
+            for (reg = 42; reg <= 50; reg++) {
+                if (!is_reg_allocated(reg))
+                    goto reg_found;
+            }
+            goto try_next;
+        case 'I': // I-Type 12 bit signed immediate
+ case 'i': // immediate integer operand, including symbolic constants
+            if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
+                goto try_next;
+            break;
+        case 'm': // memory operand
+        case 'g': // any register
+            /* nothing special to do because the operand is already in
+               memory, except if the pointer itself is stored in a
+               memory variable (VT_LLOCAL case) */
+            /* XXX: fix constant case */
+            /* if it is a reference to a memory zone, it must lie
+               in a register, so we reserve the register in the
+               input registers and a load will be generated
+               later */
+            if (j < nb_outputs || c == 'm') {
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
+                    /* any general register: from a0 to a7 */
+                    for (reg = 10; reg <= 18; reg++) {
+                        if (!(regs_allocated[reg] & REG_IN_MASK))
+                            goto reg_found1;
+                    }
+                    goto try_next;
+                  reg_found1:
+                    /* now we can reload in the register */
+                    regs_allocated[reg] |= REG_IN_MASK;
+                    op->reg = reg;
+                    op->is_memory = 1;
+                }
+            }
+            break;
+        default:
+            tcc_error("asm constraint %d ('%s') could not be satisfied",
+                      j, op->constraint);
+            break;
+        }
+        /* if a reference is present for that operand, we assign it too */
+        if (op->input_index >= 0) {
+            operands[op->input_index].reg = op->reg;
+            operands[op->input_index].is_llong = op->is_llong;
+        }
+    }
+
+    /* compute out_reg. It is used to store outputs registers to memory
+       locations references by pointers (VT_LLOCAL case) */
+    *pout_reg = -1;
+    for (i = 0; i < nb_operands; i++) {
+        op = &operands[i];
+        if (op->reg >= 0 &&
+            (op->vt->r & VT_VALMASK) == VT_LLOCAL && !op->is_memory) {
+            if (REG_IS_FLOAT(op->reg)){
+                /* From fa0 to fa7 */
+                for (reg = 42; reg <= 50; reg++) {
+                    if (!(regs_allocated[reg] & REG_OUT_MASK))
+                        goto reg_found2;
+                }
+            } else {
+                /* From a0 to a7 */
+                for (reg = 10; reg <= 18; reg++) {
+                    if (!(regs_allocated[reg] & REG_OUT_MASK))
+                        goto reg_found2;
+                }
+            }
+            tcc_error("could not find free output register for reloading");
+          reg_found2:
+            *pout_reg = reg;
+            break;
+        }
+    }
+
+    /* print sorted constraints */
+#ifdef ASM_DEBUG
+    for (i = 0; i < nb_operands; i++) {
+        j = sorted_op[i];
+        op = &operands[j];
+        printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
+               j,
+               op->id ? get_tok_str(op->id, NULL) : "",
+               op->constraint, op->vt->r, op->reg);
+    }
+    if (*pout_reg >= 0)
+        printf("out_reg=%d\n", *pout_reg);
+#endif
 }

 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
@@ -1372,13 +1863,13 @@ ST_FUNC int asm_parse_regvar (int t)
         return t - TOK_ASM_x0;

     if (t < TOK_ASM_zero)
-        return t - TOK_ASM_f0;
+        return t - TOK_ASM_f0 + 32; // Use higher 32 for floating point

     /* ABI mnemonic */
     if (t < TOK_ASM_ft0)
         return t - TOK_ASM_zero;

-    return t - TOK_ASM_ft0;
+    return t - TOK_ASM_ft0 + 32; // Use higher 32 for floating point
 }

 /*************************************************************/
diff --git a/riscv64-gen.c b/riscv64-gen.c
index 46971b1a..de48b5bc 100644
--- a/riscv64-gen.c
+++ b/riscv64-gen.c
@@ -2,7 +2,6 @@

 // Number of registers available to allocator:
 #define NB_REGS 19 // x10-x17 aka a0-a7, f10-f17 aka fa0-fa7, xxx, ra, sp
-#define NB_ASM_REGS 32
 #define CONFIG_TCC_ASM

 #define TREG_R(x) (x) // x = 0..7
diff --git a/tccasm.c b/tccasm.c
index d1c48e8c..ba7ffe97 100644
--- a/tccasm.c
+++ b/tccasm.c
@@ -1115,6 +1115,9 @@ static void subst_asm_operands(ASMOperand *operands, int nb_operands,
             if (*str == 'c' || *str == 'n' ||
*str == 'b' || *str == 'w' || *str == 'h' || *str == 'k' ||
                *str == 'q' || *str == 'l' ||
+#ifdef TCC_TARGET_RISCV64
+               *str == 'z' ||
+#endif
                /* P in GCC would add "@PLT" to symbol refs in PIC mode,
                   and make literal operands not be decorated with '$'.  */
                *str == 'P')
--
2.41.0


_______________________________________________
Tinycc-devel mailing list
Tinycc-devel@nongnu.org
https://lists.nongnu.org/mailman/listinfo/tinycc-devel

Reply via email to