Thanks for the feedback. I'll see about getting things updated once I have the chance. I wasn't too terribly concerned with performance as the target processor being emulated is much slower than the host (about 100MHz). Even the way things are now, Linux runs several times faster in emulation that it does on the actual target CPU.
Many of the optimizations you point out look pretty simple to incorporate so I will definitely see about doing that. I'll review the places you've pointed out where the translation vs. execution time CPU state is being mis-handled. It seems to be working, but those are definitely the types of things that will introduce hard to find bugs. -- Chris Wulff > > +static void gen_check_supervisor(DisasContext *dc, int label) > > +{ > > + int l1 = gen_new_label(); > > + > > + TCGv_i32 tmp = tcg_temp_new(); > > + tcg_gen_andi_tl(tmp, dc->cpu_R[CR_STATUS], CR_STATUS_U); > > + tcg_gen_brcond_tl(TCG_COND_EQ, dc->cpu_R[R_ZERO], tmp, l1); > > + t_gen_helper_raise_exception(dc, EXCP_SUPERI); > > + tcg_gen_br(label); > > + > > + gen_set_label(l1); > > + tcg_temp_free_i32(tmp); > > + > > + /* If we aren't taking the exception, update the PC to the > > + * next instruction */ > > + tcg_gen_movi_tl(dc->cpu_R[R_PC], dc->pc+4); > > You probably don't want to do that. It's quite expensive to have a > branch in every priviledged instruction. > > Given that CR_STATUS_U is use for the flags in cpu_get_tb_cpu_state(), > for a given value of CR_STATUS_U, only TB with a corresponding > CR_STATUS_U will be reused. > > For that you should copy tb->flags in DisasContext and use that instead. > > > +} > > + > > +static inline void gen_load_u(DisasContext *dc, TCGv dst, TCGv addr, > > + unsigned int size) > > +{ > > + int mem_index = cpu_mmu_index(dc->env); > > In general you should not use env in instructions.c, unless it is for > accessing a value that doesn't change during the execution (for example > CPU features/capabilities), as there is no guarantee that the value will > be the same when the TB is actually executed. > > In this case it should work correctly because cpu_mmu_index() only > access CR_STATUS_U which is in the TB flags, but it is technically > wrong. Either make cpu_mmu_index check the tb flags (see above to add > them in DisasContext), or add a mem_index flag in DisasContext, computed > from TB flags at the beginning of gen_intermediate_code_internal. > > In the latter case, you might even remove these functions and directly > call tcg_gen_qemu_ldXXu(dst, addr, ctx->mem_index) when needed. > > > > + > > + switch (size) { > > + case 1: > > + tcg_gen_qemu_ld8u(dst, addr, mem_index); > > + break; > > + case 2: > > + tcg_gen_qemu_ld16u(dst, addr, mem_index); > > + break; > > + case 4: > > + tcg_gen_qemu_ld32u(dst, addr, mem_index); > > + break; > > + default: > > + cpu_abort(dc->env, "Incorrect load size %d\n", size); > > + break; > > + } > > +} > > + > > +static inline void gen_load_s(DisasContext *dc, TCGv dst, TCGv addr, > > + unsigned int size) > > +{ > > + int mem_index = cpu_mmu_index(dc->env); > > + > > Ditto > > > + switch (size) { > > + case 1: > > + tcg_gen_qemu_ld8s(dst, addr, mem_index); > > + break; > > + case 2: > > + tcg_gen_qemu_ld16s(dst, addr, mem_index); > > + break; > > + case 4: > > + tcg_gen_qemu_ld32s(dst, addr, mem_index); > > + break; > > + default: > > + cpu_abort(dc->env, "Incorrect load size %d\n", size); > > + break; > > + } > > +} > > + > > +static inline void gen_store(DisasContext *dc, TCGv val, TCGv addr, > > + unsigned int size) > > +{ > > + int mem_index = cpu_mmu_index(dc->env); > > + > > Ditto > > > + switch (size) { > > + case 1: > > + tcg_gen_qemu_st8(val, addr, mem_index); > > + break; > > + case 2: > > + tcg_gen_qemu_st16(val, addr, mem_index); > > + break; > > + case 4: > > + tcg_gen_qemu_st32(val, addr, mem_index); > > + break; > > + default: > > + cpu_abort(dc->env, "Incorrect load size %d\n", size); > > + break; > > + } > > +} > > + > > +/* > > + * Used as a placeholder for all instructions which do not have an > effect on the > > + * simulator (e.g. flush, sync) > > + */ > > +static void nop(DisasContext *dc __attribute__((unused)), > > + uint32_t code __attribute__((unused))) > > +{ > > + /* Nothing to do here */ > > +} > > + > > +/* > > + * J-Type instructions > > + */ > > + > > +/* > > + * ra <- PC + 4 > > + * PC <- (PC(31..28) : IMM26 * 4) > > + */ > > +static void call(DisasContext *dc, uint32_t code) > > +{ > > + J_TYPE(instr, code); > > + > > +#ifdef CALL_TRACING > > + TCGv_i32 tmp = tcg_const_i32(dc->pc); > > + TCGv_i32 tmp2 = tcg_const_i32((dc->pc & 0xF0000000) | (instr->imm26 > * 4)); > > + gen_helper_call_status(tmp, tmp2); > > + tcg_temp_free_i32(tmp); > > + tcg_temp_free_i32(tmp2); > > +#endif > > + > > + tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4); > > + tcg_gen_movi_tl(dc->cpu_R[R_PC], > > + (dc->pc & 0xF0000000) | (instr->imm26 * 4)); > > + > > + dc->is_jmp = DISAS_JUMP; > > +} > > + > > You probably want to add some tcg_gen_goto_tb() for static jumps, so > that TB linking is possible. It greatly improves the speed of the > emulation. > > > > +/* PC <- (PC(31..28) : IMM26 * 4) */ > > +static void jmpi(DisasContext *dc, uint32_t code) > > +{ > > + J_TYPE(instr, code); > > + > > + tcg_gen_movi_tl(dc->cpu_R[R_PC], > > + (dc->pc & 0xF0000000) | (instr->imm26 * 4)); > > + > > + dc->is_jmp = DISAS_JUMP; > > +} > > + > > +/* > > + * I-Type instructions > > + */ > > + > > +/* rB <- 0x000000 : Mem8[rA + @(IMM16)] */ > > +static void ldbu(DisasContext *dc, uint32_t code) > > +{ > > + I_TYPE(instr, code); > > + > > + TCGv addr = tcg_temp_new(); > > + tcg_gen_movi_tl(addr, (int32_t)((int16_t)instr->imm16)); > > + tcg_gen_add_tl(addr, dc->cpu_R[instr->a], addr); > > Minor nitpick: it's better to write: > > tcg_gen_add_tl(addr, addr, dc->cpu_R[instr->a]); > > as the code generator on non-RISC hosts usually generate a slightly tiny > better code. > > > + > > + gen_load_u(dc, dc->cpu_R[instr->b], addr, 1); > > + > > + tcg_temp_free(addr); > > +} > > + > > +/* rB <- rA + IMM16 */ > > +static void addi(DisasContext *dc, uint32_t code) > > +{ > > + I_TYPE(instr, code); > > + > > + TCGv imm = tcg_temp_new(); > > + tcg_gen_movi_tl(imm, (int32_t)((int16_t)instr->imm16)); > > + > > + tcg_gen_add_tl(dc->cpu_R[instr->b], dc->cpu_R[instr->a], imm); > > + > > You can also call tcg_gen_addi_tl() instead, so that you don't need > to load the constant by yourself. As a bonus the immediate = 0 case > is optimized earlier in the code generation. > > > + tcg_temp_free(imm); > > +} > > + > > +/* Mem8[rA + @(IMM16)] <- rB(7..0) */ > > +static void stb(DisasContext *dc, uint32_t code) > > +{ > > + I_TYPE(instr, code); > > + > > + TCGv addr = tcg_temp_new(); > > + tcg_gen_movi_tl(addr, (int32_t)((int16_t)instr->imm16)); > > + tcg_gen_add_tl(addr, dc->cpu_R[instr->a], addr); > > Ditto > > > > +/* rC <- rA rotated left IMM5 bit positions */ > > +static void roli(DisasContext *dc, uint32_t code) > > +{ > > + R_TYPE(instr, code); > > + > > + TCGv t0 = tcg_temp_new(); > > + > > + tcg_gen_shri_tl(t0, dc->cpu_R[instr->a], 32 - instr->imm5); > > + tcg_gen_shli_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->a], > instr->imm5); > > + tcg_gen_or_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->c], t0); > > + > > Looks like you want to use tcg_gen_rotli, which is optimized on some > hosts (like x86). > > > + tcg_temp_free(t0); > > +} > > + > > +/* rC <- rA rotated left rB(4..0) bit positions */ > > +static void rol(DisasContext *dc, uint32_t code) > > +{ > > + R_TYPE(instr, code); > > + > > + TCGv t0 = tcg_temp_new(); > > + TCGv t1 = tcg_temp_new(); > > + > > + tcg_gen_andi_tl(t0, dc->cpu_R[instr->b], 31); > > + tcg_gen_movi_tl(t1, 32); > > + tcg_gen_sub_tl(t1, t1, t0); > > + tcg_gen_shri_tl(t1, dc->cpu_R[instr->a], t1); > > + tcg_gen_shli_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->a], t0); > > + tcg_gen_or_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->c], t1); > > + > > Same, tcg_gen_rotl exists. > > > -- > Aurelien Jarno GPG: 1024D/F1BCDB73 > aurel...@aurel32.net http://www.aurel32.net >