On 09/29/2016 03:05 AM, Richard Henderson wrote: >> hw/nios2/cpu_pic.c | 70 +++ > > Why is this in this patch?
Ah, good catch, moved to 10m50 support patch. >> target-nios2/instruction.c | 1427 >> ++++++++++++++++++++++++++++++++++++++++++++ >> target-nios2/instruction.h | 279 +++++++++ >> target-nios2/translate.c | 242 ++++++++ > > Why are these files separate? Remnant of the old code, will merge them. >> + if (n < 32) /* GP regs */ >> + return gdb_get_reg32(mem_buf, env->regs[n]); >> + else if (n == 32) /* PC */ >> + return gdb_get_reg32(mem_buf, env->regs[R_PC]); >> + else if (n < 49) /* Status regs */ >> + return gdb_get_reg32(mem_buf, env->regs[n - 1]); > > Use checkpatch.pl; the formatting is wrong. Fixed across the entire series, thanks. > There's no particular reason why R_PC needs to be 64; if you change it > to 32, you can simplify this. I believe this is in fact needed, see [1] page 18 (section 2, Register file), quote: " The Nios II architecture supports a flat register file, consisting of thirty-two 32-bit general-purpose integer registers, and up to thirty-two 32-bit control registers. The architecture supports supervisor and user modes that allow system code to protect the control registers from errant applications. " So the CPU supports 32 general purpose regs (R_ZERO..R_RA), then up-to 32 Control registers (CR_STATUS..CR_MPUACC) and then the PC . [1] https://www.altera.com/content/dam/altera-www/global/en_US/pdfs/literature/hb/nios2/n2cpu_nii5v1.pdf >> +struct CPUNios2State { >> + uint32_t regs[NUM_CORE_REGS]; >> + >> + /* Addresses that are hard-coded in the FPGA build settings */ >> + uint32_t reset_addr; >> + uint32_t exception_addr; >> + uint32_t fast_tlb_miss_addr; > > These three should go after ... > >> + >> +#if !defined(CONFIG_USER_ONLY) >> + Nios2MMU mmu; >> + >> + uint32_t irq_pending; >> +#endif >> + >> + CPU_COMMON > > ... here, or even into ... > >> +}; >> + >> +/** >> + * Nios2CPU: >> + * @env: #CPUNios2State >> + * >> + * A Nios2 CPU. >> + */ >> +typedef struct Nios2CPU { >> + /*< private >*/ >> + CPUState parent_obj; >> + /*< public >*/ >> + >> + CPUNios2State env; >> + bool mmu_present; > > ... here. Moved here, it looks more sensible as it's not something one can change at runtime. >> +static inline void t_gen_helper_raise_exception(DisasContext *dc, >> + uint32_t index) > > Remove all of the inline markers and let the compiler decide. Done globally, thanks. >> +static void gen_check_supervisor(DisasContext *dc, TCGLabel *label) >> +{ >> + TCGLabel *l1 = gen_new_label(); >> + >> + TCGv_i32 tmp = tcg_temp_new(); >> + tcg_gen_andi_tl(tmp, dc->cpu_R[CR_STATUS], CR_STATUS_U); >> + tcg_gen_brcond_tl(TCG_COND_EQ, dc->cpu_R[R_ZERO], tmp, l1); >> + t_gen_helper_raise_exception(dc, EXCP_SUPERI); >> + tcg_gen_br(label); > > The supervisor check should be done at translate time by checking > dc->tb->flags & CR_STATUS_U. Fixed. >> +#ifdef CALL_TRACING >> + TCGv_i32 tmp = tcg_const_i32(dc->pc); >> + TCGv_i32 tmp2 = tcg_const_i32((dc->pc & 0xF0000000) | >> (instr->imm26 * 4)); >> + gen_helper_call_status(tmp, tmp2); >> + tcg_temp_free_i32(tmp); >> + tcg_temp_free_i32(tmp2); >> +#endif > > What's the point of this? Seems like some stale debug helper, removed, thanks. >> +/* >> + * I-Type instructions >> + */ >> + >> +/* rB <- 0x000000 : Mem8[rA + @(IMM16)] */ >> +static void ldbu(DisasContext *dc, uint32_t code) >> +{ >> + I_TYPE(instr, code); >> + >> + TCGv addr = tcg_temp_new(); >> + tcg_gen_addi_tl(addr, dc->cpu_R[instr->a], >> + (int32_t)((int16_t)instr->imm16)); >> + >> + tcg_gen_qemu_ld8u(dc->cpu_R[instr->b], addr, dc->mem_idx); >> + >> + tcg_temp_free(addr); >> +} > > You should have helper functions so that you don't have to replicate 12 > line functions. Use the tcg_gen_qemu_ld_tl function, and the TCGMemOp > flags to help merge all load instructions, and all store instructions. Oh nice, thanks. >> +/* rB <- rA + IMM16 */ >> +static void addi(DisasContext *dc, uint32_t code) >> +{ >> + I_TYPE(instr, code); >> + >> + TCGv imm = tcg_temp_new(); >> + tcg_gen_addi_tl(dc->cpu_R[instr->b], dc->cpu_R[instr->a], >> + (int32_t)((int16_t)instr->imm16)); > > The double cast is pointless, as are the extra parenthesis. The int16_t cast is needed as the imm16 is signed value. The int32() cast is indeed pointless, so removed. Thanks. > You're not doing anything to make sure that r0 reads as 0, and ignores > writes. You need to look at target-alpha, target-mips, or target-sparc > to see various ways in which a zero register may be handled. Any hint on this one ? > You should handle the special case of movi, documented as addi rb, r0, > imm, so that the common method of loading a small value does not require > extra tcg opcodes. > > Similarly for the other (common) aliases, like mov and nop, which are > both aliased to add. Fixed >> +/* Initializes the data cache line currently caching address rA + >> @(IMM16) */ >> +static void initda(DisasContext *dc __attribute__((unused)), >> + uint32_t code __attribute__((unused))) >> +{ >> + /* TODO */ >> +} > > This will always be a nop for qemu. Replaced with NOP instructions. >> +static void blt(DisasContext *dc, uint32_t code) >> +{ >> + I_TYPE(instr, code); >> + >> + TCGLabel *l1 = gen_new_label(); >> + >> + tcg_gen_brcond_tl(TCG_COND_LT, dc->cpu_R[instr->a], >> + dc->cpu_R[instr->b], l1); >> + >> + gen_goto_tb(dc, 0, dc->pc + 4); >> + >> + gen_set_label(l1); >> + >> + gen_goto_tb(dc, 1, dc->pc + 4 + (int16_t)(instr->imm16 & 0xFFFC)); >> + >> + dc->is_jmp = DISAS_TB_JUMP; >> +} > > These really require a helper function. Yes, fixed. >> +/* rB <- 0x000000 : Mem8[rA + @(IMM16)] (bypassing cache) */ >> +static void ldbuio(DisasContext *dc, uint32_t code) > > Reuse ldbu; qemu will never implement caches. > >> +/* rB <- (rA * @(IMM16))(31..0) */ >> +static void muli(DisasContext *dc, uint32_t code) >> +{ >> + I_TYPE(instr, code); >> + >> + TCGv imm = tcg_temp_new(); >> + tcg_gen_muli_i32(dc->cpu_R[instr->b], dc->cpu_R[instr->a], >> + (int32_t)((int16_t)instr->imm16)); >> + tcg_temp_free(imm); > > imm is unused. I reworked this some more, so fixed. >> +static const Nios2Instruction i_type_instructions[I_TYPE_COUNT] = { >> + [CALL] = INSTRUCTION(call), >> + [JMPI] = INSTRUCTION(jmpi), >> + [0x02] = INSTRUCTION_ILLEGAL(), >> + [LDBU] = INSTRUCTION(ldbu), >> + [ADDI] = INSTRUCTION(addi), >> + [STB] = INSTRUCTION(stb), >> + [BR] = INSTRUCTION(br), >> + [LDB] = INSTRUCTION(ldb), >> + [CMPGEI] = INSTRUCTION(cmpgei), >> + [0x09] = INSTRUCTION_ILLEGAL(), >> + [0x0a] = INSTRUCTION_ILLEGAL(), >> + [LDHU] = INSTRUCTION(ldhu), >> + [ANDI] = INSTRUCTION(andi), >> + [STH] = INSTRUCTION(sth), >> + [BGE] = INSTRUCTION(bge), >> + [LDH] = INSTRUCTION(ldh), >> + [CMPLTI] = INSTRUCTION(cmplti), >> + [0x11] = INSTRUCTION_ILLEGAL(), >> + [0x12] = INSTRUCTION_ILLEGAL(), >> + [INITDA] = INSTRUCTION(initda), >> + [ORI] = INSTRUCTION(ori), >> + [STW] = INSTRUCTION(stw), >> + [BLT] = INSTRUCTION(blt), >> + [LDW] = INSTRUCTION(ldw), >> + [CMPNEI] = INSTRUCTION(cmpnei), >> + [0x19] = INSTRUCTION_ILLEGAL(), >> + [0x1a] = INSTRUCTION_ILLEGAL(), >> + [FLUSHDA] = INSTRUCTION_NOP(flushda), >> + [XORI] = INSTRUCTION(xori), >> + [0x1d] = INSTRUCTION_ILLEGAL(), >> + [BNE] = INSTRUCTION(bne), >> + [0x1f] = INSTRUCTION_ILLEGAL(), >> + [CMPEQI] = INSTRUCTION(cmpeqi), >> + [0x21] = INSTRUCTION_ILLEGAL(), >> + [0x22] = INSTRUCTION_ILLEGAL(), >> + [LDBUIO] = INSTRUCTION(ldbuio), >> + [MULI] = INSTRUCTION(muli), >> + [STBIO] = INSTRUCTION(stbio), >> + [BEQ] = INSTRUCTION(beq), >> + [LDBIO] = INSTRUCTION(ldbio), >> + [CMPGEUI] = INSTRUCTION(cmpgeui), >> + [0x29] = INSTRUCTION_ILLEGAL(), >> + [0x2a] = INSTRUCTION_ILLEGAL(), >> + [LDHUIO] = INSTRUCTION(ldhuio), >> + [ANDHI] = INSTRUCTION(andhi), >> + [STHIO] = INSTRUCTION(sthio), >> + [BGEU] = INSTRUCTION(bgeu), >> + [LDHIO] = INSTRUCTION(ldhio), >> + [CMPLTUI] = INSTRUCTION(cmpltui), >> + [0x31] = INSTRUCTION_ILLEGAL(), >> + [CUSTOM] = INSTRUCTION_UNIMPLEMENTED(custom), >> + [INITD] = INSTRUCTION(initd), >> + [ORHI] = INSTRUCTION(orhi), >> + [STWIO] = INSTRUCTION(stwio), >> + [BLTU] = INSTRUCTION(bltu), >> + [LDWIO] = INSTRUCTION(ldwio), >> + [RDPRS] = INSTRUCTION_UNIMPLEMENTED(rdprs), >> + [0x39] = INSTRUCTION_ILLEGAL(), >> + [R_TYPE] = { "<R-type instruction>", handle_r_type_instr }, >> + [FLUSHD] = INSTRUCTION_NOP(flushd), >> + [XORHI] = INSTRUCTION(xorhi), >> + [0x3d] = INSTRUCTION_ILLEGAL(), >> + [0x3e] = INSTRUCTION_ILLEGAL(), >> + [0x3f] = INSTRUCTION_ILLEGAL(), > > Is there really any point to using a table, as opposed to a switch > statement? At least with a switch you can use default to catch all of > the illegals at once. I don't need to track the instructions encodings separately, since the NIOS2 only supports 40 I-/R-instructions . >> +/* rC <- ((unsigned)rA * (unsigned)rB))(31..0) */ >> +static void mulxuu(DisasContext *dc, uint32_t code) >> +{ >> + R_TYPE(instr, code); >> + >> + TCGv_i64 t0, t1; >> + >> + t0 = tcg_temp_new_i64(); >> + t1 = tcg_temp_new_i64(); >> + >> + tcg_gen_extu_i32_i64(t0, dc->cpu_R[instr->a]); >> + tcg_gen_extu_i32_i64(t1, dc->cpu_R[instr->b]); >> + tcg_gen_mul_i64(t0, t0, t1); >> + >> + tcg_gen_shri_i64(t0, t0, 32); >> + tcg_gen_extrl_i64_i32(dc->cpu_R[instr->c], t0); > > TCGv t0 = tcg_temp_new(); > tcg_gen_mulu2_tl(t0, dc->cpu_R[instr->c], > dc->cpu_R[instr->a], > dc->cpu_R[instr->b]); > tcg_temp_free(t0); > >> +/* rC <- ((signed)rA * (unsigned)rB))(31..0) */ >> +static void mulxsu(DisasContext *dc, uint32_t code) >> +{ >> + R_TYPE(instr, code); >> + >> + TCGv_i64 t0, t1; >> + >> + t0 = tcg_temp_new_i64(); >> + t1 = tcg_temp_new_i64(); >> + >> + tcg_gen_ext_i32_i64(t0, dc->cpu_R[instr->a]); >> + tcg_gen_extu_i32_i64(t1, dc->cpu_R[instr->b]); >> + tcg_gen_mul_i64(t0, t0, t1); >> + >> + tcg_gen_shri_i64(t0, t0, 32); >> + tcg_gen_extrl_i64_i32(dc->cpu_R[instr->c], t0); > > Similarly, using tcg_gen_mulsu2_tl. To be fair, a patch for this was > posted yesterday, and will be included in a tcg pull soon. See > > https://patchwork.ozlabs.org/patch/675859/ Oh, nice, I picked this patch and replaced all those mulx*() functions. >> +static void callr(DisasContext *dc, uint32_t code) >> +{ >> + R_TYPE(instr, code); >> + >> +#ifdef CALL_TRACING >> + TCGv_i32 tmp = tcg_const_i32(dc->pc); >> + gen_helper_call_status(tmp, dc->cpu_R[instr->a]); >> + tcg_temp_free_i32(tmp); >> +#endif >> + >> + tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4); >> + tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[instr->a]); > > Does the hardware actually prevent rA == RA like this? The > documentation is unclear. If the hardware actually performs the > described actions in parallel, then you need to swap these two lines. That would only make sense, so swapped. >> +/* rC <- ((signed)rA * (signed)rB))(31..0) */ >> +static void mulxss(DisasContext *dc, uint32_t code) >> +{ >> + R_TYPE(instr, code); >> + >> + TCGv_i64 t0, t1; >> + >> + t0 = tcg_temp_new_i64(); >> + t1 = tcg_temp_new_i64(); >> + >> + tcg_gen_ext_i32_i64(t0, dc->cpu_R[instr->a]); >> + tcg_gen_ext_i32_i64(t1, dc->cpu_R[instr->b]); >> + tcg_gen_mul_i64(t0, t0, t1); > > Similarly, tcg_gen_muls2_tl. Fixed. >> +/* rC <- rA / rB */ >> +static void divu(DisasContext *dc, uint32_t code) >> +{ >> + R_TYPE(instr, code); >> + >> + gen_helper_divu(dc->cpu_R[instr->c], dc->cpu_R[instr->a], >> + dc->cpu_R[instr->b]); >> +} > > You can perform this inline. C.f. target-mips > > Since divide-by-zero is undefined, this can be done with > > TCGv t0 = tcg_const_tl(0); > tcg_gen_setcond_tl(TCG_COND_EQ, t0, dc->cpu_R[instr->b], t0); > tcg_gen_or_tl(t0, t0, dc->cpu_R[instr->b]); > tcg_gen_divu_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->a], t0); > tcg_temp_free_tl(t0); Just so I get it completely, isn't this handled somehow by the tcg_gen_divu_tl() itself ? >> + gen_helper_divs(dc->cpu_R[instr->c], dc->cpu_R[instr->a], >> + dc->cpu_R[instr->b]); > > Similarly. Fixed >> +/* >> + * bstatus ← status >> + * PIE <- 0 >> + * U <- 0 >> + * ba <- PC + 4 >> + * PC <- break handler address >> + */ >> +static void __break(DisasContext *dc, uint32_t code >> __attribute__((unused))) > > This symbol is in the system namespace. Just use break_insn or something. Fixed >> +static const Nios2Instruction r_type_instructions[R_TYPE_COUNT] = { >> + [0x00] = INSTRUCTION_ILLEGAL(), >> + [ERET] = INSTRUCTION(eret), >> + [ROLI] = INSTRUCTION(roli), >> + [ROL] = INSTRUCTION(rol), >> + [FLUSHP] = INSTRUCTION(flushp), >> + [RET] = INSTRUCTION(ret), >> + [NOR] = INSTRUCTION(nor), >> + [MULXUU] = INSTRUCTION(mulxuu), >> + [CMPGE] = INSTRUCTION(cmpge), >> + [BRET] = INSTRUCTION(bret), >> + [0x0a] = INSTRUCTION_ILLEGAL(), >> + [ROR] = INSTRUCTION(ror), >> + [FLUSHI] = INSTRUCTION(flushi), >> + [JMP] = INSTRUCTION(jmp), >> + [AND] = INSTRUCTION(and), >> + [0x0f] = INSTRUCTION_ILLEGAL(), >> + [CMPLT] = INSTRUCTION(cmplt), >> + [0x11] = INSTRUCTION_ILLEGAL(), >> + [SLLI] = INSTRUCTION(slli), >> + [SLL] = INSTRUCTION(sll), >> + [WRPRS] = INSTRUCTION_UNIMPLEMENTED(wrprs), >> + [0x15] = INSTRUCTION_ILLEGAL(), >> + [OR] = INSTRUCTION(or), >> + [MULXSU] = INSTRUCTION(mulxsu), >> + [CMPNE] = INSTRUCTION(cmpne), >> + [0x19] = INSTRUCTION_ILLEGAL(), >> + [SRLI] = INSTRUCTION(srli), >> + [SRL] = INSTRUCTION(srl), >> + [NEXTPC] = INSTRUCTION(nextpc), >> + [CALLR] = INSTRUCTION(callr), >> + [XOR] = INSTRUCTION(xor), >> + [MULXSS] = INSTRUCTION(mulxss), >> + [CMPEQ] = INSTRUCTION(cmpeq), >> + [0x21] = INSTRUCTION_ILLEGAL(), >> + [0x22] = INSTRUCTION_ILLEGAL(), >> + [0x23] = INSTRUCTION_ILLEGAL(), >> + [DIVU] = INSTRUCTION(divu), >> + [DIV] = { "div", _div }, >> + [RDCTL] = INSTRUCTION(rdctl), >> + [MUL] = INSTRUCTION(mul), >> + [CMPGEU] = INSTRUCTION(cmpgeu), >> + [INITI] = INSTRUCTION(initi), >> + [0x2a] = INSTRUCTION_ILLEGAL(), >> + [0x2b] = INSTRUCTION_ILLEGAL(), >> + [0x2c] = INSTRUCTION_ILLEGAL(), >> + [TRAP] = INSTRUCTION(trap), >> + [WRCTL] = INSTRUCTION(wrctl), >> + [0x2f] = INSTRUCTION_ILLEGAL(), >> + [CMPLTU] = INSTRUCTION(cmpltu), >> + [ADD] = INSTRUCTION(add), >> + [0x32] = INSTRUCTION_ILLEGAL(), >> + [0x33] = INSTRUCTION_ILLEGAL(), >> + [BREAK] = { "break", __break }, >> + [0x35] = INSTRUCTION_ILLEGAL(), >> + [SYNC] = INSTRUCTION(nop), >> + [0x37] = INSTRUCTION_ILLEGAL(), >> + [0x38] = INSTRUCTION_ILLEGAL(), >> + [SUB] = INSTRUCTION(sub), >> + [SRAI] = INSTRUCTION(srai), >> + [SRA] = INSTRUCTION(sra), >> + [0x3c] = INSTRUCTION_ILLEGAL(), >> + [0x3d] = INSTRUCTION_ILLEGAL(), >> + [0x3e] = INSTRUCTION_ILLEGAL(), >> + [0x3f] = INSTRUCTION_ILLEGAL(), >> +}; > > Similar comment wrt the I-type table. > >> +const char *instruction_get_string(uint32_t code) >> +{ >> + uint32_t op = get_opcode(code); >> + >> + if (unlikely(op >= I_TYPE_COUNT)) { >> + return ""; >> + } else if (op == R_TYPE) { >> + uint32_t opx = get_opxcode(code); >> + if (unlikely(opx >= R_TYPE_COUNT)) { >> + return ""; >> + } >> + return r_type_instructions[opx].name; >> + } else { >> + return i_type_instructions[op].name; >> + } >> +} > > What is this function used for? Nothing, removed. >> +/* I-Type instruction */ >> +typedef struct Nios2IType { >> + uint32_t op:6; >> + uint32_t imm16:16; >> + uint32_t b:5; >> + uint32_t a:5; >> +} QEMU_PACKED Nios2IType; > > These bitfields are a non-starter. Layout of them is non-portable in > more ways than is worth going into here. You must use extract32 and > sextract32 to extract the fields. Fixed >> + >> +union i_type_u { >> + uint32_t v; >> + Nios2IType i; >> +}; >> + >> +#define I_TYPE(instr, op) \ >> + union i_type_u instr_u = { .v = op }; \ >> + Nios2IType *instr = &instr_u.i > > You could probably still hide everything behind this macro, with an > inline function returning a structure (defined *without* bitfields). Yes, fixed. >> +/* >> + * Return values for instruction handlers >> + */ >> +#define INSTR_UNIMPL -2 /* Unimplemented instruction */ >> +#define INSTR_ERR -1 /* Error in instruction */ >> +#define PC_INC_NORMAL 0 /* Normal PC increment after instruction */ >> +#define PC_INC_BY_INSTR 1 /* PC got incremented by instruction */ >> +#define INSTR_BREAK 2 /* Break encountered */ >> +#define INSTR_EXCEPTION 255 /* Instruction generated an exception >> + (the exception cause will be stored >> + in struct nios2 */ > > What's the gain over using a simple enum with sequential values? These are unused, so removed. >> +/* >> + * FIXME: Convert to VMstate >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "hw/hw.h" >> +#include "hw/boards.h" >> + >> +void cpu_save(QEMUFile *f, void *opaque) >> +{ >> + /* TODO */ >> +} >> + >> +int cpu_load(QEMUFile *f, void *opaque, int version_id) >> +{ >> + /* TODO */ >> + return 0; >> +} > > Fixing this is no longer optional. This file wasn't even compiled, so removed. >> +void helper_memalign(CPUNios2State *env, uint32_t addr, uint32_t dr, >> uint32_t wr, uint32_t mask) >> +{ >> + if (addr & mask) { >> + qemu_log("unaligned access addr=%x mask=%x, wr=%d dr=r%d\n", >> + addr, mask, wr, dr); >> + env->regs[CR_BADADDR] = addr; >> + env->regs[CR_EXCEPTION] = EXCP_UNALIGN << 2; >> + helper_raise_exception(env, EXCP_UNALIGN); >> + } >> +} > > What is this doing that cc->do_unaligned_access doesn't? Switched to do_unaligned_access, thanks. >> + /* Initialize DC */ >> + dc->cpu_env = cpu_env; >> + dc->cpu_R = cpu_R; > > What is this assignment for? Are you planning to implement shadow > registers at some point? Eventually yes, but so far I haven't seen that used at all. > r~ Thanks for the review! -- Best regards, Marek Vasut