Update of /cvsroot/arcem/arcem/jit In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv26947/jit
Added Files: Tag: jit codeblocks.c codeblocks.h codegen.h decoder.c decoder.h dirtyranges.c dirtyranges.h emuinterf.c emuinterf.h emuinterf2.h jit.c jit.h jitpage.c jitpage.h jitstate.h jitstate2.h memattr.c memattr.h metrics.c metrics.h regalloc.c regalloc.h Log Message: WIP ARM-on-ARM JIT engine This is the beginnings of an ARM-on-ARM JIT engine, designed to be used by emulators like ArcEm and RPCEmu Main functionality issues to resolve: * Currently it's only functional for RISC OS hosts. But it should be fairly striaghtforward to get it working on other host OS's * Not all instructions are supported yet; unsupported instructions will be interpreted * The simplified interpreter loop which the JIT is invoked from doesn't implement the instruction prefetch pipeline; this will eventually need fixing (e.g. make the loop smart enough to stay in interpreter mode until the prefetched instructions match what's in memory, i.e. the CPU has left the self-modifying code sequence) * The JIT will update the cycle counter but won't actually trigger any events until the end of the JIT code block is reached, this may cause issues with some software * However the biggest problem is likely to be that the single-pass code generation results in sub-optimal handling of complex instructions like LDR/STR. So future development is likely to focus on experimenting with more complex code generation techniques, e.g. compiler-style code graphs --- NEW FILE: codegen.h --- #ifndef CODEGEN_HEADER #define CODEGEN_HEADER #include "emuinterf.h" #include <assert.h> static inline uint32_t JITCodeGen_LoadImm(uint32_t cc, uint32_t hostreg, uint32_t basereg, uint32_t offset) { assert(offset < 4096); return cc | 0x05900000 | (basereg << 16) | (hostreg << 12) | offset; } static inline uint32_t JITCodeGen_Load_Rm(uint32_t cc, uint32_t hostreg, uint32_t basereg, uint32_t Rm) { assert(Rm < 4096); return cc | 0x07900000 | (basereg << 16) | (hostreg << 12) | Rm; } static inline uint32_t JITCodeGen_StoreImm(uint32_t cc, uint32_t hostreg, uint32_t basereg, uint32_t offset) { assert(offset < 4096); return cc | 0x05800000 | (basereg << 16) | (hostreg << 12) | offset; } static inline uint32_t JITCodeGen_EncodeImm12(uint32_t val) { uint32_t ret = 0; if (val) { while (((val > 255) || !(val & 3))) { val = (val << 2) | (val >> 30); ret+=256; assert(ret != (16<<8)); } } return ret | val; } static inline uint32_t JITCodeGen_DataProc_Rd_Imm(uint32_t cc, uint32_t op, uint32_t Rd, int imm) { return cc | (1<<25) | op | (Rd<<12) | JITCodeGen_EncodeImm12(imm); } static inline uint32_t JITCodeGen_DataProc_Rd_Rn_Imm(uint32_t cc, uint32_t op, uint32_t Rd, uint32_t Rn, int imm) { return cc | (1<<25) | op | (Rn<<16) | (Rd<<12) | JITCodeGen_EncodeImm12(imm); } static inline uint32_t JITCodeGen_DataProc_Rn_Imm(uint32_t cc, uint32_t op, uint32_t Rn, int imm) { return cc | (1<<25) | op | (1<<20) | (Rn<<16) | JITCodeGen_EncodeImm12(imm); } static inline uint32_t JITCodeGen_DataProc_Rn_Rm(uint32_t cc, uint32_t op, uint32_t Rn, uint32_t Rm) { return cc | op | (1<<20) | (Rn<<16) | Rm; } static inline uint32_t JITCodeGen_DataProc_Rd_Rm(uint32_t cc, uint32_t op, uint32_t Rd, uint32_t Rm) { return cc | op | (Rd<<12) | Rm; } static inline uint32_t JITCodeGen_DataProc_Rd_Rn_Rm(uint32_t cc, uint32_t op, uint32_t Rd, uint32_t Rn, uint32_t Rm) { return cc | op | (Rn<<16) | (Rd<<12) | Rm; } static inline uint32_t JITCodeGen_LoadNZCV(uint32_t cc, int hostreg) { /* MSR CPSR_f,hostreg */ return cc | 0x0128f000 | hostreg | cc; } static inline uint32_t JITCodeGen_SavePSR(uint32_t cc, int hostreg) { /* MRS hostreg,CPSR */ return cc | 0x010f0000 | (hostreg << 12) | cc; } static inline uint32_t JITCodeGen_Branch(uint32_t cc, int32_t offset) { return cc | 0x0a000000 | (((offset-8)>>2) & 0xffffff); } #endif --- NEW FILE: dirtyranges.h --- #ifndef DIRTYRANGES_HEADER #define DIRTYRANGES_HEADER #include "emuinterf.h" typedef struct { uintptr_t start; uintptr_t end; } DirtyRange; extern DirtyRange *DirtyRanges_Claim(uintptr_t addr); extern void DirtyRanges_Flush(void); #endif --- NEW FILE: codeblocks.h --- #ifndef CODEBLOCKS_HEADER #define CODEBLOCKS_HEADER #include <stdio.h> #include "dirtyranges.h" #include "jitpage.h" /* ForwardCodeBlock writes code going forwards, BackwardCodeBlock writes code going backwards */ typedef struct { DirtyRange *dirty; uintptr_t nextinstr; /* Address next instruction will be written to */ uintptr_t data_start; /* Start of data section, inclusive */ uintptr_t data_end; /* End of data section, exclusive */ } ForwardCodeBlock, BackwardCodeBlock; /* Get next ID that will be allocated; marks start of a code generation pass */ extern int CodeBlock_NextID(void); /* Claim the ID that was returned by NextID; marks end of a code generation pass */ extern void CodeBlock_ClaimID(int id, JITPage *page); /* Mark the chain starting with 'ID' as invalid */ extern void CodeBlock_InvalidateID(int id); extern void ForwardCodeBlock_New(ForwardCodeBlock *out, bool chain); extern void BackwardCodeBlock_New(BackwardCodeBlock *out, bool chain); static inline uintptr_t ForwardCodeBlock_WriteCode(ForwardCodeBlock *block, uint32_t word) { uint32_t *ptr = (uint32_t *) block->nextinstr; #ifdef JIT_DEBUG fprintf(stderr,"%08x: %08x\n",ptr,word); #endif *ptr++ = word; block->dirty->end = block->nextinstr = (uintptr_t) ptr; if (block->data_start == (uintptr_t) (ptr+1)) { /* Ran out of space in this block, generate branch to new block */ ForwardCodeBlock_New(block, true); } return (uintptr_t) (ptr-1); } static inline uintptr_t ForwardCodeBlock_WriteData(ForwardCodeBlock *block, uint32_t data) { /* TODO reuse existing values */ uint32_t *ptr = (uint32_t *) block->data_start; *(--ptr) = data; block->data_start = (uintptr_t) ptr; if ((uintptr_t) ptr == block->nextinstr+4) { /* Ran out of space in this block, generate branch to new block */ ForwardCodeBlock_New(block, true); } return (uintptr_t) ptr; } static inline uintptr_t BackwardCodeBlock_WriteCode(BackwardCodeBlock *block, uint32_t word) { uint32_t *ptr = (uint32_t *) block->nextinstr; block->dirty->start = (uintptr_t) ptr; *ptr-- = word; block->nextinstr = (uintptr_t) ptr; if (block->data_end == (uintptr_t) ptr) { /* Ran out of space in this block, generate branch from new block */ BackwardCodeBlock_New(block, true); } return (uintptr_t) (ptr+1); } static inline uintptr_t BackwardCodeBlock_WriteData(BackwardCodeBlock *block, uint32_t data) { /* TODO reuse existing values */ uint32_t *ptr = (uint32_t *) block->data_end; *(++ptr) = data; block->data_end = (uintptr_t) ptr; if ((uintptr_t) ptr == block->nextinstr) { /* Ran out of space in this block, generate branch from new block */ BackwardCodeBlock_New(block, true); } return (uintptr_t) (ptr-1); } #endif --- NEW FILE: codeblocks.c --- #include "codeblocks.h" #include "emuinterf.h" #include "jitstate2.h" #include <assert.h> #include <stdio.h> typedef struct { JITPage *owner; } codeblock_header; #define CODEBLOCK_SIZE (512-sizeof(codeblock_header)) /* Size in bytes */ #define CODEBLOCK_COUNT 2048 typedef struct { codeblock_header header; uint8_t data[CODEBLOCK_SIZE]; } codeblock; static codeblock codeblocks[CODEBLOCK_COUNT]; static int rr_next; static codeblock *codeblock_claim(const JITState *jit) { codeblock *block = &codeblocks[rr_next++]; if (rr_next == CODEBLOCK_COUNT) { rr_next = 0; } if (block->header.owner) { JITPage_ForgetCode(jit,block->header.owner); } return block; } static void writebranch(uintptr_t src, uintptr_t dest) { uint32_t offset = dest-8-src; *((uint32_t *) src) = ((offset>>2) & 0xffffff) | 0xEA000000; } int CodeBlock_NextID(void) { return rr_next; } void CodeBlock_ClaimID(int id, JITPage *page) { codeblocks[id].header.owner = page; } void CodeBlock_InvalidateID(int id) { if (id != -1) { codeblocks[id].header.owner = NULL; } } void ForwardCodeBlock_New(ForwardCodeBlock *out, bool chain) { const JITState *jit = JIT_GetState(JITEmuInterf_GetState()); codeblock *block = codeblock_claim(jit); #ifdef JIT_DEBUG fprintf(stderr,"ForwardCodeBlock_New: chain %d -> %08x\n",chain,block->data); #endif /* If we're chaining blocks, insert a branch at the end of 'out' */ if (chain) { uintptr_t loc = out->nextinstr; writebranch(loc, (uintptr_t) block->data); out->dirty->end = loc+4; } out->dirty = DirtyRanges_Claim((uintptr_t) block->data); out->nextinstr = (uintptr_t) block->data; out->data_start = out->data_end = (uintptr_t) (block->data + CODEBLOCK_SIZE); } void BackwardCodeBlock_New(BackwardCodeBlock *out, bool chain) { const JITState *jit = JIT_GetState(JITEmuInterf_GetState()); codeblock *block = codeblock_claim(jit); /* If we're chaining blocks, insert a branch at the end of 'block' */ if (chain) { writebranch((uintptr_t) block->data, out->nextinstr+4); } out->dirty = DirtyRanges_Claim((uintptr_t) (block->data + CODEBLOCK_SIZE)); if (chain) { out->dirty->start -= 4; } out->nextinstr = out->dirty->start - 4; out->data_start = out->data_end = (uintptr_t) block->data; } --- NEW FILE: dirtyranges.c --- #include "dirtyranges.h" #include "metrics.h" #include <assert.h> #include <stdio.h> #ifdef __riscos__ #include <kernel.h> #include <swis.h> #endif #define MAX_DIRTY_RANGES 256 typedef struct { DirtyRange ranges[MAX_DIRTY_RANGES]; int index; } DirtyRanges; static DirtyRanges dirty; DirtyRange *DirtyRanges_Claim(uintptr_t addr) { assert(dirty.index < MAX_DIRTY_RANGES); DirtyRange *r = &dirty.ranges[dirty.index++]; r->start = r->end = addr; return r; } void DirtyRanges_Flush(void) { #ifdef DEBUG_JIT_DUMP uint32_t *addr; fprintf(stderr,"JIT:\n"); #endif /* TODO optimise (can IMB_List be called in user mode?) */ while (dirty.index > 0) { DirtyRange *r = &dirty.ranges[--dirty.index]; #ifdef DEBUG_JIT_METRICS jitmetrics.instructions_out += (r->end - r->start)>>2; #endif #ifdef DEBUG_JIT_DUMP for(addr=(uint32_t *)r->start;addr!=(uint32_t *)r->end;addr++) { char *str; _swix(Debugger_Disassemble,_INR(0,1)|_OUT(1),*addr,addr,&str); fprintf(stderr,"%08x %08x %s\n",addr,*addr,str); } #endif #ifdef __riscos__ if (r->start != r->end) { _swix(OS_SynchroniseCodeAreas,_INR(0,2),1,r->start,r->end-4); } #endif } } --- NEW FILE: memattr.h --- #ifndef MEMATTR_HEADER #define MEMATTR_HEADER #include "jitstate.h" #include <string.h> #define JIT_MEMFLAG_CODE 1 #define JIT_MEMFLAG_ENTRYPOINT 2 extern void MemAttr_Init(JITState *jit,uint32_t romramchunksize); static inline uint8_t *MemAttr_Get(const JITState *jit,void *phy) { return (uint8_t *) (jit->addr2flags + (((uintptr_t) phy)>>2)); } static inline void MemAttr_SetCodeFlag(const JITState *jit,void *phy) { *MemAttr_Get(jit,phy) |= JIT_MEMFLAG_CODE; } static inline void MemAttr_SetEntryPointFlag(const JITState *jit,void *phy) { *MemAttr_Get(jit,phy) |= JIT_MEMFLAG_ENTRYPOINT; } static inline void MemAttr_ClearFlags(const JITState *jit,void *phy) { *MemAttr_Get(jit,phy) = 0; } static inline void MemAttr_ClearFlagsRange(const JITState *jit,void *phy_begin, void *phy_end) { uint8_t *begin = MemAttr_Get(jit,phy_begin); uint8_t *end = MemAttr_Get(jit,phy_end); memset(begin, 0, end-begin); } static inline bool MemAttr_GetCodeFlag(const JITState *jit,void *phy) { return (*MemAttr_Get(jit,phy)) & JIT_MEMFLAG_CODE; } static inline bool MemAttr_GetEntryPointFlag(const JITState *jit,void *phy) { return (*MemAttr_Get(jit,phy)) & JIT_MEMFLAG_ENTRYPOINT; } #endif --- NEW FILE: memattr.c --- #include <stdlib.h> #include "memattr.h" void MemAttr_Init(JITState *jit,uint32_t romramchunksize) { jit->memflags = calloc(romramchunksize>>2,1); jit->addr2flags = ((uintptr_t) jit->memflags)-(jit->romramchunk>>2); } --- NEW FILE: jitstate.h --- #ifndef JITSTATE_HEADER #define JITSTATE_HEADER #include "emuinterf.h" typedef enum { JITResult_Interpret, /* Interpret the instruction at PC-8 */ JITResult_Normal, /* Continue normal execution (try JITing the instruction at the PC-8) */ } JITResult; typedef JITResult (*JITFunc)(JITEmuState *state,void *addr); typedef struct JITPage JITPage; /* Private JIT state struct */ typedef struct { uintptr_t addr2func; /* Offset to apply to address pointers to convert to func pointers */ uintptr_t addr2flags; /* Offset to apply to convert (shifted) address pointers to memory flag pointers */ JITFunc *phy2func; uint8_t *memflags; JITPage *pages; uintptr_t romramchunk; #ifdef DEBUG_JIT_METRICS_EXEC uint32_t exec_count; #endif } JITState; #endif --- NEW FILE: regalloc.c --- #include <limits.h> #include <assert.h> #include <string.h> #include "regalloc.h" #include "emuinterf2.h" #include "decoder.h" void JITRegAlloc_Init(JITRegAlloc *ra, ForwardCodeBlock *block) { int i; memset(ra, 0, sizeof(JITRegAlloc)); for (i=0;i<JIT_H_REG_NUM;i++) { ra->host_regs[i].emureg = JIT_E_REG_NONE; } for (i=0;i<JIT_E_REG_NUM;i++) { ra->emu_regs[i].hostreg = JIT_H_REG_NONE; } ra->free_host_regs = (1<<12) + 0xe; /* initially used for state ptr */ ra->callee_save = 0x4ff0; /* r4-r11, r14 */ ra->block = block; /* Init the state reg */ ra->emu_regs[JIT_E_StateReg].hostreg = JIT_H_R0; ra->host_regs[JIT_H_R0].emureg = JIT_E_StateReg; ra->host_regs[JIT_H_R0].required = true; } void JITRegAlloc_Copy(JITRegAlloc *dest, JITRegAlloc *src, ForwardCodeBlock *block) { /* Forget any callee-save instr, for both src and dest This prevents the two going out-of-sync if either src or dest need to allocate new callee-save regs */ src->callee_save_instr = NULL; memcpy(dest, src, sizeof(JITRegAlloc)); dest->block = block; } void JITRegAlloc_Fork(JITRegAlloc *parent, JITRegAlloc *child) { /* Force write-back of any dirty regs (we can't guarantee they'll still be mapped when we join) */ JITRegAlloc_WriteBackDirty(parent); /* Unlike Copy, we can retain the callee-save instr */ memcpy(child, parent, sizeof(JITRegAlloc)); } void JITRegAlloc_Join(JITRegAlloc *parent, JITRegAlloc *child) { int i; /* Now resolve the register mappings; if host reg has same emu reg for both paths, merge dirty flags Else force any writeback */ for(i=0;i<JIT_H_REG_NUM;i++) { if (parent->host_regs[i].emureg == child->host_regs[i].emureg) { parent->host_regs[i].dirty |= child->host_regs[i].dirty; } else { JITRegAlloc_FlushHostReg(child, (JITHostReg) i); JITRegAlloc_FlushHostReg(parent, (JITHostReg) i); /* Shouldn't trigger a store */ } } if (parent->callee_save_instr) { /* Pull apart this instruction to see if there are any saved registers that were added by the child (it's safest for us to work it out this way, rather than examine the child) */ uint32_t saved = 0; if (((*parent->callee_save_instr) & 0x0f000000) == 0x05000000) { saved = (1<<(((*parent->callee_save_instr) >> 12) & 15)); } else { saved = (*parent->callee_save_instr) & 0xffff; } saved &= parent->callee_save; parent->callee_save &= ~saved; parent->callee_saved |= saved; parent->free_host_regs |= saved; } /* If there are any registers the child pushed which we can't locate the save instr for, get the child to restore them itself */ child->callee_saved &= ~parent->callee_saved; JITRegAlloc_CalleeRestore(child, false); /* Update block reference */ parent->block = child->block; /* We must forget any callee-save instr, since any update we make to it will put it out of sync with any restore the child performed TODO - Have a flag in JITRegAlloc for whether the child has restored? */ parent->callee_save_instr = NULL; } void JITRegAlloc_CalleeSave(JITRegAlloc *ra, JITHostReg hr) { /* XXX should be in EmuInterf */ ra->callee_saved |= (1<<hr); if (!ra->callee_save_instr) { ra->callee_save_instr = (uint32_t *) ForwardCodeBlock_WriteCode(ra->block, 0xe52d0004 | (hr<<12)); /* STR Rn,[R13,#-4]! */ } else { if (((*ra->callee_save_instr) & 0x0f000000) == 0x05000000) { /* STR? */ *ra->callee_save_instr = 0xe92d0000 | (1<<(((*ra->callee_save_instr) >> 12) & 15)); /* Convert to STM */ } *ra->callee_save_instr |= (1<<hr); /* n.b. can't use callee_saved since we might be chaining STR/STM */ } /* Update register lists */ ra->free_host_regs |= 1<<hr; ra->callee_save &= ~(1<<hr); } JITHostReg JITRegAlloc_MapReg(JITRegAlloc *ra, JITEmuReg r, int required, JITHostReg hostreg, uint32_t cc) { if (hostreg == JIT_H_REG_NONE) { if (ra->emu_regs[r].hostreg == JIT_H_REG_NONE) { hostreg = JITRegAlloc_GetFreeHostReg(ra); } else { hostreg = ra->emu_regs[r].hostreg; ra->host_regs[hostreg].required += required; return hostreg; /* already resident */ } } if (ra->emu_regs[r].hostreg == hostreg) { ra->host_regs[hostreg].required += required; return hostreg; } JITRegAlloc_FlushHostReg(ra, hostreg); if (ra->emu_regs[r].hostreg != JIT_H_REG_NONE) { if (cc != JIT_NV) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rm(cc, JIT_MOV, hostreg, ra->emu_regs[r].hostreg)); /* needs to deal with move to/from PSR? */ } ra->host_regs[hostreg] = ra->host_regs[ra->emu_regs[r].hostreg]; ra->free_host_regs |= 1 << ra->emu_regs[r].hostreg; ra->host_regs[ra->emu_regs[r].hostreg] = (JITHostRegState) { JIT_E_REG_NONE, false, 0 }; } else if (cc != JIT_NV) { /* XXX make general */ if (r < JIT_E_CycleCount) { ForwardCodeBlock_WriteCode(ra->block, JITEmuInterf2_LoadReg(cc, hostreg, ra->emu_regs[JIT_E_StateReg].hostreg, r)); } else if (r == JIT_E_CycleCount) { ForwardCodeBlock_WriteCode(ra->block, JITEmuInterf2_LoadCycleCount(cc, hostreg, ra->emu_regs[JIT_E_StateReg].hostreg)); } #ifdef DEBUG_JIT_METRICS_EXEC else if (r == JIT_E_Exec) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_LoadImm(cc,hostreg,ra->emu_regs[JIT_E_StateReg].hostreg,offsetof(ARMul_State,jit)+offsetof(JITState,exec_count))); } #endif } ra->emu_regs[r].hostreg = hostreg; ra->host_regs[hostreg].emureg = r; ra->host_regs[hostreg].required += required; ra->free_host_regs &= ~(1<<hostreg); return hostreg; } void JITRegAlloc_UnmapReg(JITRegAlloc *ra, JITEmuReg r) { if (r == JIT_E_REG_NONE) { return; } JITHostReg hr = ra->emu_regs[r].hostreg; if (hr == JIT_H_REG_NONE) { return; } assert(!ra->host_regs[hr].required); JITRegAlloc_WriteBack(ra, hr); ra->free_host_regs |= 1 << hr; ra->host_regs[ra->emu_regs[r].hostreg] = (JITHostRegState) { JIT_E_REG_NONE, false, 0 }; ra->emu_regs[r] = (JITEmuRegState) { JIT_H_REG_NONE }; } JITHostReg JITRegAlloc_GetFreeHostReg(JITRegAlloc *ra) { int start = ra->next_idx; /* weight the registers by the associated cost: 1. any from free_host_regs 2. any where !required && !dirty -> later on, might want to associate an explicit cost with reloading/recalculating values? 3. highest numbered reg from callee_save (must be highest numbered so we can have multiple STR/STM but only one LDR/LDM) -> later on, might want to give these explicit costs as well? ldm/stm can transfer quickly, need to be able to represent if the next register to claim is going to be quick or slow 4. any where !required && dirty -> later on, might want to associate an explicit cost; if the register is no longer needed then writing it back will be cheaper than taking from the callee-save list could do this in one pass, tracking the cost associated with using a given register */ int best_cost = INT_MAX; int best_idx = 0; #define CANDIDATE(cost) if (best_cost > cost) { best_idx = ra->next_idx; best_cost = cost; } do { ra->next_idx = (ra->next_idx + 1) & 15; if (ra->free_host_regs & (1<<ra->next_idx)) { CANDIDATE(1) /* rule 1 */ break; /* always going to win, so stop here */ } else if (ra->callee_save & (1<<ra->next_idx)) { CANDIDATE(3 + 16 - ra->next_idx) /* rule 3 */ } else if ((ra->host_regs[ra->next_idx].emureg != JIT_E_REG_NONE) && !ra->host_regs[ra->next_idx].required) { if (ra->host_regs[ra->next_idx].dirty) { CANDIDATE(4 + 20) /* rule 4 */ } else { CANDIDATE(2) if (!ra->free_host_regs) { break; /* not going to get any better than this */ } } } } while(start != ra->next_idx); assert(best_cost != INT_MAX); ra->next_idx = best_idx; if (ra->callee_save & (1<<best_idx)) { /* preserve contents of callee-save register */ JITRegAlloc_CalleeSave(ra, best_idx); /* assume this will track which registers need restoring, and will update ra->callee_save & ra->free_host_regs to indicate which register(s) have become available */ } else if (!(ra->free_host_regs & (1<<best_idx))) { /* reclaim register that's already in use */ JITRegAlloc_FlushHostReg(ra, best_idx); } return (JITHostReg) best_idx; } void JITRegAlloc_UnmapAll(JITRegAlloc *ra) { int i; for (i=0; i<JIT_H_REG_NUM; i++) { JITRegAlloc_FlushHostReg(ra, i); } } void JITRegAlloc_WriteBack(JITRegAlloc *ra, JITHostReg hr) { JITEmuReg r; if ((hr == JIT_H_REG_NONE) || (!ra->host_regs[hr].dirty)) { return; } /* XXX make general */ r = ra->host_regs[hr].emureg; if (r < JIT_E_CycleCount) { ForwardCodeBlock_WriteCode(ra->block, JITEmuInterf2_StoreReg(JIT_AL, hr, ra->emu_regs[JIT_E_StateReg].hostreg, r)); } else if (r == JIT_E_CycleCount) { ForwardCodeBlock_WriteCode(ra->block, JITEmuInterf2_StoreCycleCount(JIT_AL, hr, ra->emu_regs[JIT_E_StateReg].hostreg)); } #ifdef DEBUG_JIT_METRICS_EXEC else if (r == JIT_E_Exec) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_StoreImm(JIT_AL,hr,ra->emu_regs[JIT_E_StateReg].hostreg,offsetof(ARMul_State,jit)+offsetof(JITState,exec_count))); } #endif ra->host_regs[hr].dirty = false; } void JITRegAlloc_WriteBackUnrequired(JITRegAlloc *ra) { int i; for (i=0; i<JIT_H_REG_NUM; i++) { if (!ra->host_regs[i].required) { JITRegAlloc_WriteBack(ra, i); } } } void JITRegAlloc_WriteBackDirty(JITRegAlloc *ra) { int i; for (i=0; i<JIT_H_REG_NUM; i++) { if (ra->host_regs[i].dirty) { JITRegAlloc_WriteBack(ra, i); } } } void JITRegAlloc_CalleeRestore(JITRegAlloc *ra, bool ret) { /* Write back everything */ JITRegAlloc_WriteBackDirty(ra); /* Restore callee-save regs and/or return */ if (ra->callee_saved) { if (ra->callee_saved == (1<<14)) { ForwardCodeBlock_WriteCode(ra->block, (ret ? 0xe49df004 : 0xe49de004)); /* LDR PC or R14 */ } else { uint32_t ldm = 0xe8bd0000 | ra->callee_saved; if (ret) { ldm ^= 3<<14; } ForwardCodeBlock_WriteCode(ra->block,ldm); } } else if (ret) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rm(JIT_AL,JIT_MOV,15,14)); } /* Reset our state */ JITRegAlloc_Init(ra, ra->block); } --- NEW FILE: emuinterf2.h --- #ifndef JITEMUINTERF2_HEADER #define JITEMUINTERF2_HEADER #include <stddef.h> #include "codegen.h" #include "jit.h" #include "../armdefs.h" #include "regalloc.h" /* YUCK */ #include "decoder.h" /* Extra emulator interfaces */ static inline uint32_t JITEmuInterf2_LoadReg(uint32_t cc, uint32_t hostreg, uint32_t statereg, uint32_t emureg) { return JITCodeGen_LoadImm(cc,hostreg,statereg,offsetof(ARMul_State,Reg[emureg])); } static inline uint32_t JITEmuInterf2_StoreReg(uint32_t cc, uint32_t hostreg, uint32_t statereg, uint32_t emureg) { return JITCodeGen_StoreImm(cc,hostreg,statereg,offsetof(ARMul_State,Reg[emureg])); } static inline uint32_t JITEmuInterf2_LoadCycleCount(uint32_t cc, uint32_t hostreg, uint32_t statereg) { return JITCodeGen_LoadImm(cc,hostreg,statereg,offsetof(ARMul_State,NumCycles)); } static inline uint32_t JITEmuInterf2_StoreCycleCount(uint32_t cc, uint32_t hostreg, uint32_t statereg) { return JITCodeGen_StoreImm(cc,hostreg,statereg,offsetof(ARMul_State,NumCycles)); } extern void JITEmuInterf2_WriteEpilogue(JITRegAlloc *ra, JITResult result); extern void JITEmuInterf2_WriteLoad(JITRegAlloc *ra, JITHostReg Rd, JITHostReg Raddr, bool byte, uintptr_t abort); extern void JITEmuInterf2_WriteLDM(JITRegAlloc *ra, JITHostReg Rn, JITHostReg Raddr, const Instruction *instr, uintptr_t abort); #endif --- NEW FILE: regalloc.h --- #ifndef REGALLOC_HEADER #define REGALLOC_HEADER #include "emuinterf.h" #include "codeblocks.h" /* YUCK */ typedef enum { JIT_H_R0 = 0, /* JIT_H_R1 ... JIT_H_R15 as expected */ JIT_H_PC = 15, JIT_H_PSR = 16, JIT_H_REG_NUM, JIT_H_REG_NONE = -1 } JITHostReg; typedef enum { JIT_E_R0 = 0, /* JIT_E_R1 ... JIT_E_R15 as expected */ JIT_E_PC = 15, JIT_E_CycleCount, JIT_E_Temp, JIT_E_Temp2, JIT_E_Temp3, JIT_E_Temp4, JIT_E_StateReg, #ifdef DEBUG_JIT_METRICS_EXEC JIT_E_Exec, #endif JIT_E_REG_NUM, JIT_E_REG_NONE = -1 } JITEmuReg; #define JIT_HR(x) ((JITHostReg) (JIT_H_R0 + (x))) #define JIT_ER(x) ((JITEmuReg) (JIT_E_R0 + (x))) typedef struct { JITHostReg hostreg; /* JIT_H_REG_NONE if not loaded */ } JITEmuRegState; typedef struct { JITEmuReg emureg; /* JIT_E_REG_NONE if not in use */ bool dirty; /* true if value needs writing back */ uint8_t required; /* >0 if required */ } JITHostRegState; typedef struct { JITEmuRegState emu_regs[JIT_E_REG_NUM]; /* Which host reg an emu reg maps to */ JITHostRegState host_regs[JIT_H_REG_NUM]; /* Which emu reg a host reg maps to */ uint32_t free_host_regs; /* Host registers which are free for immediate use */ uint32_t callee_save; /* Host registers which can be made free for use by callee-save mechanism */ int next_idx; uint32_t *callee_save_instr; /* STR/STM that implements the callee-save */ uint32_t callee_saved; /* Which callee-save regs have been saved */ ForwardCodeBlock *block; /* Code block to use for writing any instructions */ } JITRegAlloc; /* Initialise a register allocator */ extern void JITRegAlloc_Init(JITRegAlloc *ra, ForwardCodeBlock *block); /* Copy a register allocator, changing the associated code block Use when permanently splitting the path of execution */ extern void JITRegAlloc_Copy(JITRegAlloc *dest, JITRegAlloc *src, ForwardCodeBlock *block); /* Copy a register allocator Use when temporarily splitting the path of execution Parent must not be used until joined with child */ extern void JITRegAlloc_Fork(JITRegAlloc *parent, JITRegAlloc *child); /* Join the two paths so only parent remains */ extern void JITRegAlloc_Join(JITRegAlloc *parent, JITRegAlloc *child); /* Trigger callee-saving of hr */ extern void JITRegAlloc_CalleeSave(JITRegAlloc *ra, JITHostReg hr); /* Request that r is made available Specify hostreg of JIT_H_REG_NONE for automatic register assignment required should be 1 or 0 cc specifies the condition under which the value should be loaded (JIT_NV if we always overwrite) */ extern JITHostReg JITRegAlloc_MapReg(JITRegAlloc *ra, JITEmuReg r, int required, JITHostReg hostreg, uint32_t cc); /* Lock/unlock a register by incrementing/decrementing the required count */ static inline void JITRegAlloc_LockReg(JITRegAlloc *ra, JITHostReg hr, int required, bool dirty) { if (hr != JIT_H_REG_NONE) { ra->host_regs[hr].required += required; ra->host_regs[hr].dirty |= dirty; } } /* Unmap a given emu register */ extern void JITRegAlloc_UnmapReg(JITRegAlloc *ra, JITEmuReg r); /* Return a free host reg */ extern JITHostReg JITRegAlloc_GetFreeHostReg(JITRegAlloc *ra); /* Unmap a given host register */ static inline void JITRegAlloc_FlushHostReg(JITRegAlloc *ra, JITHostReg hostreg) { JITRegAlloc_UnmapReg(ra, ra->host_regs[hostreg].emureg); } /* Unmap all registers */ extern void JITRegAlloc_UnmapAll(JITRegAlloc *ra); /* Lock and return an emu reg if it's present */ static inline JITHostReg JITRegAlloc_LockEmuReg(JITRegAlloc *ra, JITEmuReg r) { JITHostReg hr = ra->emu_regs[r].hostreg; if (hr != JIT_H_REG_NONE) { ra->host_regs[hr].required++; } return hr; } /* Write back host reg if dirty (but keep resident) */ extern void JITRegAlloc_WriteBack(JITRegAlloc *ra, JITHostReg hr); /* Write back all dirty, non-required regs (but keep resident) */ extern void JITRegAlloc_WriteBackUnrequired(JITRegAlloc *ra); /* Write back all dirty regs (but keep resident) */ extern void JITRegAlloc_WriteBackDirty(JITRegAlloc *ra); /* Restore callee-save regs, and/or return from function. Resets state. */ extern void JITRegAlloc_CalleeRestore(JITRegAlloc *ra, bool ret); #endif --- NEW FILE: jit.c --- #include <assert.h> #include <stdlib.h> #include "jit.h" #include "jitpage.h" #include "memattr.h" #include "decoder.h" #include "codeblocks.h" #include "emuinterf2.h" #include "metrics.h" #include "regalloc.h" #ifdef __riscos__ #include <kernel.h> #include <swis.h> #endif void JIT_Init(JITState *jit,uintptr_t romramchunk,uint32_t romramchunksize) { int i; int count = romramchunksize/4; memset(jit,0,sizeof(JITState)); jit->romramchunk = romramchunk; jit->phy2func = (JITFunc *) malloc(sizeof(JITFunc)*count); for (i=0;i<count;i++) { jit->phy2func[i] = &JIT_Generate; } jit->addr2func = ((uintptr_t) jit->phy2func)-romramchunk; JITPage_Init(jit,romramchunksize); MemAttr_Init(jit,romramchunksize); } static inline bool JITable(const Instruction *instr) { switch (instr->type) { case InstrType_NOP: return true; case InstrType_DataProc: /* Don't allow R15 as dest */ if (Decoder_DataProc_UsesRd(instr) && (Decoder_Rd(instr) == 15)) { return false; } return true; #ifndef DEBUG_JIT_TEST_EXEC case InstrType_Branch: return true; #endif case InstrType_LDRSTR: /* Only load, non-T instructions which don't write R15 */ if (Decoder_LDRSTR_WritebackFlag(instr) && (Decoder_Rn(instr) == 15)) { return false; } return (Decoder_LDRSTR_LoadFlag(instr) && (Decoder_Rd(instr) != 15) && !Decoder_LDRSTR_TFlag(instr)); case InstrType_LDMSTM: /* Only load, non-hat, non-PC using */ if (Decoder_LDMSTM_HatFlag(instr) || (Decoder_Rn(instr) == 15) || (instr->instr & 0x8000) || !Decoder_LDMSTM_LoadFlag(instr)) { return false; } return true; default: return false; } } typedef struct { bool psr_loaded; bool psr_dirty; int cycles; int instrs; } CodeGenState; static void codegen_sync(JITRegAlloc *ra,CodeGenState *cgstate) { if (cgstate->instrs || cgstate->cycles || cgstate->psr_dirty) { JITHostReg cyc = JITRegAlloc_LockEmuReg(ra, JIT_E_CycleCount); JITHostReg pc = JITRegAlloc_LockEmuReg(ra, JIT_E_PC); JITHostReg temp = (cgstate->psr_dirty ? JITRegAlloc_LockEmuReg(ra, JIT_E_Temp) : JIT_H_REG_NONE); #ifdef DEBUG_JIT_METRICS_EXEC JITHostReg exec = JITRegAlloc_LockEmuReg(ra, JIT_E_Exec); #endif JITRegAlloc_WriteBackUnrequired(ra); /* Free up some regs */ if (cyc == JIT_H_REG_NONE) { cyc = JITRegAlloc_MapReg(ra, JIT_E_CycleCount, 1, JIT_H_REG_NONE, JIT_AL); } if (pc == JIT_H_REG_NONE) { pc = JITRegAlloc_MapReg(ra, JIT_E_PC, 1, JIT_H_REG_NONE, JIT_AL); } if (cgstate->psr_dirty && (temp == JIT_H_REG_NONE)) { temp = JITRegAlloc_MapReg(ra, JIT_E_Temp, 1, JIT_H_REG_NONE, JIT_NV); } #ifdef DEBUG_JIT_METRICS_EXEC if (exec == JIT_H_REG_NONE) { exec = JITRegAlloc_MapReg(ra, JIT_E_Exec, 1, JIT_H_REG_NONE, JIT_AL); } ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_ADD,exec,exec,cgstate->instrs)); JITRegAlloc_LockReg(ra, exec, -1, true); #endif if (cgstate->psr_dirty) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_SavePSR(JIT_AL,temp)); } ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_ADD,cyc,cyc,cgstate->cycles)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_ADD,pc,pc,cgstate->instrs<<2)); if (cgstate->psr_dirty) { /* TODO prime candidate for BFI */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_AND,temp,temp,0xF0000000)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_BIC,pc,pc,0xF0000000)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Rm(JIT_AL,JIT_ORR,pc,pc,temp)); } JITRegAlloc_LockReg(ra, cyc, -1, true); JITRegAlloc_LockReg(ra, pc, -1, true); JITRegAlloc_LockReg(ra, temp, -1, false); cgstate->psr_loaded = cgstate->psr_dirty = false; cgstate->cycles = cgstate->instrs = 0; } } static void codegen_sync_pc(JITRegAlloc *ra,CodeGenState *cgstate,bool psr) { psr &= cgstate->psr_dirty; if (cgstate->instrs || psr) { JITHostReg pc = JITRegAlloc_LockEmuReg(ra, JIT_E_PC); JITHostReg temp = (psr ? JITRegAlloc_LockEmuReg(ra, JIT_E_Temp) : JIT_H_REG_NONE); #ifdef DEBUG_JIT_METRICS_EXEC JITHostReg exec = JITRegAlloc_LockEmuReg(ra, JIT_E_Exec); #endif if (pc == JIT_H_REG_NONE) { pc = JITRegAlloc_MapReg(ra, JIT_E_PC, 1, JIT_H_REG_NONE, JIT_AL); } if (psr && (temp == JIT_H_REG_NONE)) { temp = JITRegAlloc_MapReg(ra, JIT_E_Temp, 1, JIT_H_REG_NONE, JIT_NV); } #ifdef DEBUG_JIT_METRICS_EXEC if (exec == JIT_H_REG_NONE) { exec = JITRegAlloc_MapReg(ra, JIT_E_Exec, 1, JIT_H_REG_NONE, JIT_AL); } ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_ADD,exec,exec,cgstate->instrs)); JITRegAlloc_LockReg(ra, exec, -1, true); #endif if (psr) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_SavePSR(JIT_AL,temp)); } if (cgstate->instrs) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_ADD,pc,pc,cgstate->instrs<<2)); } if (psr) { /* TODO prime candidate for BFI */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_AND,temp,temp,0xF0000000)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_BIC,pc,pc,0xF0000000)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Rm(JIT_AL,JIT_ORR,pc,pc,temp)); cgstate->psr_dirty = false; } JITRegAlloc_LockReg(ra, pc, -1, true); JITRegAlloc_LockReg(ra, temp, -1, false); cgstate->instrs = 0; } } static void codegen_conditional_cycles(JITRegAlloc *ra,CodeGenState *cgstate,uint32_t cc,int count) { if (cc == JIT_NV) { return; } if (cc == JIT_AL) { cgstate->cycles += count; } else if (count) { JITHostReg cyc = JITRegAlloc_MapReg(ra, JIT_E_CycleCount, 1, JIT_H_REG_NONE, JIT_AL); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(cc,JIT_ADD,cyc,cyc,cgstate->cycles)); JITRegAlloc_LockReg(ra, cyc, -1, true); } } static void codegen_bigadd(ForwardCodeBlock *block, JITHostReg Rd, JITHostReg Rn, int val) { int op = JIT_ADD; int shift = 0; if (val < 0) { op = JIT_SUB; val = -val; } else if (!val) { if (Rd != Rn) { ForwardCodeBlock_WriteCode(block, JITCodeGen_DataProc_Rd_Rm(JIT_AL, JIT_MOV, Rd, Rn)); } return; } while (val) { /* XX CLZ */ while (!((val >> shift) & 0x3)) { shift += 2; } ForwardCodeBlock_WriteCode(block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,op,Rd,Rn,val & (0xff<<shift))); Rn = Rd; val &= ~(0xff<<shift); shift += 8; } } static void codegen_handle_branch(JITRegAlloc *ra, CodeGenState *cgstate, const Instruction *instr) { /* Prep PC */ JITHostReg pc = JITRegAlloc_MapReg(ra, JIT_E_PC, 1, JIT_H_REG_NONE, JIT_AL); cgstate->cycles += 2; codegen_sync(ra,cgstate); if (Decoder_Branch_BLFlag(instr)) { /* Copy PC to R14 */ JITHostReg r14 = JITRegAlloc_MapReg(ra, JIT_ER(14), 1, JIT_H_REG_NONE, JIT_NV); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_SUB,r14,pc,8)); JITRegAlloc_LockReg(ra, r14, -1, true); } /* Add branch offset */ int32_t offset = Decoder_Branch_Offset(instr)-4; if (offset) { /* To avoid needing a temp reg to deal with overflow, just rotate the PC + offset */ /* XXX temp reg could be better if e.g. PSR is already loaded */ offset <<= 6; ForwardCodeBlock_WriteCode(ra->block, 0xe1a00d60 + (pc*0x1001)); /* MOV pc,pc,ROR #26 */ codegen_bigadd(ra->block, pc, pc, offset); ForwardCodeBlock_WriteCode(ra->block, 0xe1a00360 + (pc*0x1001)); /* MOV pc,pc,ROR #6 */ } JITRegAlloc_LockReg(ra, pc, -1, true); /* Now exit */ JITEmuInterf2_WriteEpilogue(ra,JITResult_Normal); } static void codegen_handle_branch_cc(JITRegAlloc *ra1, const CodeGenState *cgstate1, ForwardCodeBlock *block, const Instruction *instr) { JITRegAlloc ra2; CodeGenState cgstate2; memcpy(&cgstate2,cgstate1,sizeof(CodeGenState)); JITRegAlloc_Copy(&ra2,ra1,block); codegen_handle_branch(&ra2,&cgstate2,instr); } static void codegen_handle_ldr(JITRegAlloc *ra,const CodeGenState *cgstate,const Instruction *instr, ForwardCodeBlock *block2) { JITHostReg Rn, Rd, Rm = JIT_H_REG_NONE, Raddr, Ralu, Roffset; /* Pre-lock registers */ Rn = JITRegAlloc_LockEmuReg(ra, Decoder_Rn(instr)); Rd = JITRegAlloc_LockEmuReg(ra, Decoder_Rd(instr)); /* Map registers */ if (!Decoder_LDRSTR_ImmFlag(instr)) { Rm = JITRegAlloc_MapReg(ra, Decoder_Rm(instr), 1, JIT_H_REG_NONE, JIT_AL); } if (Rn == JIT_H_REG_NONE) { Rn = JITRegAlloc_MapReg(ra, Decoder_Rn(instr), 1, JIT_H_REG_NONE, JIT_AL); } if (Decoder_Rn(instr) == JIT_E_PC) { /* Mask out the flags into a temp reg */ JITHostReg Rpc = JITRegAlloc_MapReg(ra, JIT_E_Temp2, 1, JIT_H_REG_NONE, JIT_NV); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_BIC,Rpc,Rn,0xFC000003)); JITRegAlloc_LockReg(ra, Rn, -1, false); Rn = Rpc; } /* Calculate ALU output in a temporary register, so we can avoid write-back on abort */ Ralu = Rn; if (Decoder_LDRSTR_HasOffset(instr)) { Ralu = JITRegAlloc_MapReg(ra, JIT_E_Temp, 1, JIT_H_REG_NONE, JIT_NV); if (Decoder_LDRSTR_ImmFlag(instr)) { codegen_bigadd(ra->block, Ralu, Rn, (instr->instr & 0xfff) * (Decoder_LDRSTR_UpFlag(instr) ? 1 : -1)); } else { uint32_t offset = (instr->instr & 0xfe0) | Rm; /* Preserve shift, knock out register-shifted-register flag */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Rm(JIT_AL, Decoder_LDRSTR_UpFlag(instr) ? JIT_ADD : JIT_SUB, Ralu, Rn, offset)); /* Rm no longer needed */ JITRegAlloc_LockReg(ra, Rm, -1, false); } } Raddr = (Decoder_LDRSTR_PreFlag(instr) ? Ralu : Rn); /* Now perform load */ if (Rd == JIT_H_REG_NONE) { Rd = JITRegAlloc_MapReg(ra, Decoder_Rd(instr), 1, JIT_H_REG_NONE, JIT_NV); } JITEmuInterf2_WriteLoad(ra, Rd, Raddr, Decoder_LDRSTR_ByteFlag(instr), block2->nextinstr); /* Write the exit handler (WriteLoad will have inserted any necessary branches) */ { JITRegAlloc ra2; CodeGenState cgstate2; memcpy(&cgstate2,cgstate,sizeof(CodeGenState)); JITRegAlloc_Copy(&ra2,ra,block2); codegen_sync(&ra2,&cgstate2); JITEmuInterf2_WriteEpilogue(&ra2,JITResult_Interpret); } /* Apply writeback */ if (Decoder_LDRSTR_WritebackFlag(instr) && (Ralu != Rn)) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rm(JIT_AL, JIT_MOV, Rn, Ralu)); JITRegAlloc_LockReg(ra, Rn, -1, true); JITRegAlloc_LockReg(ra, Ralu, -1, false); Rn = JIT_H_REG_NONE; Ralu = JIT_H_REG_NONE; } /* Release remaining regs */ JITRegAlloc_LockReg(ra, Rd, -1, true); if (Ralu != Rn) { JITRegAlloc_LockReg(ra, Ralu, -1, false); } if (Rn != JIT_H_REG_NONE) { JITRegAlloc_LockReg(ra, Rn, -1, false); } } static void codegen_handle_ldr_cc(JITRegAlloc *ra1, const CodeGenState *cgstate, uint32_t cc, const Instruction *instr, ForwardCodeBlock *block2) { uintptr_t branch; JITRegAlloc ra2; JITHostReg cyc; /* Assume cycle will be consumed */ cyc = JITRegAlloc_MapReg(ra1, JIT_E_CycleCount, 1, JIT_H_REG_NONE, JIT_AL); /* Fork register state */ JITRegAlloc_Fork(ra1,&ra2); /* Reserve space for branch */ branch = ForwardCodeBlock_WriteCode(ra2.block, 0); /* Generate code */ codegen_handle_ldr(&ra2,cgstate,instr,block2); /* Consume cycle */ ForwardCodeBlock_WriteCode(ra2.block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_ADD,cyc,cyc,1)); /* Join state */ JITRegAlloc_Join(ra1,&ra2); /* Fill in branch */ *((uint32_t *) branch) = JITCodeGen_Branch(cc ^ (1<<28),ra2.block->nextinstr-branch); JITRegAlloc_LockReg(ra1, cyc, -1, true); } static void codegen_handle_ldm(JITRegAlloc *ra,const CodeGenState *cgstate,const Instruction *instr, ForwardCodeBlock *block2) { JITHostReg Rn, Raddr; int regs = Decoder_LDMSTM_NumRegs(instr); int offset = 0; /* Map Rn */ Rn = JITRegAlloc_MapReg(ra, Decoder_Rn(instr), 1, JIT_H_REG_NONE, JIT_AL); /* Calculate base address */ Raddr = JITRegAlloc_MapReg(ra, JIT_E_Temp, 1, JIT_H_REG_NONE, JIT_NV); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_BIC,Raddr,Rn,3)); if (Decoder_LDMSTM_PreFlag(instr)) { offset = 4; } if (!Decoder_LDMSTM_UpFlag(instr)) { offset = -offset - ((regs-1)<<2); } codegen_bigadd(ra->block, Raddr, Raddr, offset); /* HACK - making registers dirty in WriteLDM could break the exit handler, so write back everything here */ JITRegAlloc_WriteBackDirty(ra); /* Now perform load */ JITEmuInterf2_WriteLDM(ra, Rn, Raddr, instr, block2->nextinstr); /* Write the exit handler (WriteLDM will have inserted any necessary branches) */ { JITRegAlloc ra2; CodeGenState cgstate2; memcpy(&cgstate2,cgstate,sizeof(CodeGenState)); JITRegAlloc_Copy(&ra2,ra,block2); codegen_sync(&ra2,&cgstate2); JITEmuInterf2_WriteEpilogue(&ra2,JITResult_Interpret); } /* Apply writeback */ if (Decoder_LDMSTM_WritebackFlag(instr) && !(instr->instr & (1<<Decoder_Rn(instr)))) { offset = (Decoder_LDMSTM_UpFlag(instr) ? 1 : -1) * (regs<<2); codegen_bigadd(ra->block, Rn, Rn, offset); JITRegAlloc_LockReg(ra, Rn, -1, true); } else { JITRegAlloc_LockReg(ra, Rn, -1, false); } JITRegAlloc_LockReg(ra, Raddr, -1, false); } static void codegen_handle_ldm_cc(JITRegAlloc *ra1, const CodeGenState *cgstate, uint32_t cc, const Instruction *instr, ForwardCodeBlock *block2) { uintptr_t branch; JITRegAlloc ra2; JITHostReg cyc; /* Assume cycles will be consumed */ cyc = JITRegAlloc_MapReg(ra1, JIT_E_CycleCount, 1, JIT_H_REG_NONE, JIT_AL); /* Fork register state */ JITRegAlloc_Fork(ra1,&ra2); /* Reserve space for branch */ branch = ForwardCodeBlock_WriteCode(ra2.block, 0); /* Generate code */ codegen_handle_ldm(&ra2,cgstate,instr,block2); /* Consume cycles */ ForwardCodeBlock_WriteCode(ra2.block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL,JIT_ADD,cyc,cyc,Decoder_LDMSTM_Cycles(instr))); /* Join state */ JITRegAlloc_Join(ra1,&ra2); /* Fill in branch */ *((uint32_t *) branch) = JITCodeGen_Branch(cc ^ (1<<28),ra2.block->nextinstr-branch); JITRegAlloc_LockReg(ra1, cyc, -1, true); } #ifndef __riscos__ static JITResult JIT_Hack(JITEmuState *state,void *addr) { return JITResult_Interpret; } #endif static void JIT_GeneratePage(JITEmuState *state,JITPage *page) { const JITState *jit = JIT_GetState(state); ForwardCodeBlock block,block2; void *addr = JITPage_StartOfPage(jit,page); int remaining = JITPAGE_SIZE; JITFunc *funcs = JIT_Phy2Func(jit,addr); CodeGenState cgstate = {false}; bool incode = false, block2_init = false; JITRegAlloc ra; #ifdef DEBUG_JIT_METRICS uint32_t entry_points = ~0, exit_points = -~0; #endif /* Release any code that currently exists */ JITPage_ForgetCode(jit,page); /* Start allocating new code */ page->codeblock = CodeBlock_NextID(); ForwardCodeBlock_New(&block, false); JITRegAlloc_Init(&ra, &block); while (remaining > 0) { remaining -= 4; if (MemAttr_GetEntryPointFlag(jit,addr)) { /* Write back state */ codegen_sync(&ra,&cgstate); JITRegAlloc_CalleeRestore(&ra,false); /* Record current location as an entry point */ #ifdef DEBUG_JIT_DUMP fprintf(stderr,"entry point: %08x\n",block.nextinstr); #endif #ifdef __riscos__ *funcs++ = (JITFunc) block.nextinstr; #else *funcs++ = &JIT_Hack; #endif incode = true; #ifdef DEBUG_JIT_METRICS entry_points++; #endif } else { /* Not an entry point */ *funcs++ = &JIT_Generate; } if (incode) { /* Decode instruction */ Instruction instr; Decoder_Decode(&instr,*((uint32_t *) addr)); if (!JITable(&instr)) { /* Terminate this block */ codegen_sync(&ra,&cgstate); JITEmuInterf2_WriteEpilogue(&ra,JITResult_Interpret); incode = false; #ifdef DEBUG_JIT_METRICS exit_points++; #endif } else { uint32_t temp,cc; JITHostReg Rd,Rn,Rm,Rs,Rpc; uintptr_t branch; #ifdef DEBUG_JIT_DUMP const char *str; uint32_t laddr = ((state->Reg[15]-8)&0x03fff000)+JITPAGE_SIZE-4-remaining; _swix(Debugger_Disassemble,_INR(0,1)|_OUT(1),instr.instr,laddr,&str); fprintf(stderr,"%08x %08x %s\n",laddr,instr.instr,str); #endif /* Sync if we've been going for a long time (hack to avoid impossible imm12) */ if ((cgstate.cycles > 128) || (cgstate.instrs > 128)) { codegen_sync(&ra,&cgstate); } /* Mark this as code */ MemAttr_SetCodeFlag(jit,addr); #ifdef DEBUG_JIT_METRICS jitmetrics.instructions_in++; #endif cc = Decoder_CC(&instr); /* Work out how to generate code */ switch (instr.type) { case InstrType_NOP: break; case InstrType_DataProc: if (!cgstate.psr_loaded && (Decoder_Conditional(&instr) || Decoder_DataProc_SFlag(&instr) || Decoder_DataProc_CarryIn(&instr))) { /* PSR needed but not loaded yet */ JITHostReg psrreg = JITRegAlloc_MapReg(&ra, JIT_E_PC, 0, JIT_H_REG_NONE, JIT_AL); ForwardCodeBlock_WriteCode(&block, JITCodeGen_LoadNZCV(JIT_AL,psrreg)); cgstate.psr_loaded = true; } /* Load arguments, update instruction */ temp = instr.instr & ~0xff000; /* Zap Rd, Rn fields */ Rn = Rd = Rm = Rs = Rpc = JIT_H_REG_NONE; /* Pre-lock registers */ if (Decoder_DataProc_IsShiftedReg(&instr)) { /* Add the extra cycle */ codegen_conditional_cycles(&ra, &cgstate, cc, 1); Rs = JITRegAlloc_LockEmuReg(&ra, Decoder_Rs(&instr)); } if (Decoder_DataProc_UsesRn(&instr)) { Rn = JITRegAlloc_LockEmuReg(&ra, Decoder_Rn(&instr)); } if (!Decoder_DataProc_ImmFlag(&instr)) { Rm = JITRegAlloc_LockEmuReg(&ra, Decoder_Rm(&instr)); } if (Decoder_DataProc_UsesRd(&instr)) { Rd = JITRegAlloc_LockEmuReg(&ra, Decoder_Rd(&instr)); } /* Now actually load them */ if (!Decoder_DataProc_ImmFlag(&instr)) { if (Rm == JIT_H_REG_NONE) { Rm = JITRegAlloc_MapReg(&ra, Decoder_Rm(&instr), 1, JIT_H_REG_NONE, JIT_AL); } if (Decoder_Rm(&instr) == JIT_E_PC) { codegen_sync_pc(&ra,&cgstate,true); } temp = (temp & ~0xf) | Rm; } if (Decoder_DataProc_UsesRn(&instr)) { if (Rn == JIT_H_REG_NONE) { Rn = JITRegAlloc_MapReg(&ra, Decoder_Rn(&instr), 1, JIT_H_REG_NONE, JIT_AL); } if (Decoder_Rn(&instr) == JIT_E_PC) { codegen_sync_pc(&ra,&cgstate,true); /* Mask out the flags into a temp reg */ Rpc = JITRegAlloc_MapReg(&ra, JIT_E_Temp, 1, JIT_H_REG_NONE, JIT_NV); ForwardCodeBlock_WriteCode(&block, JITCodeGen_DataProc_Rd_Rn_Imm(cc,JIT_BIC,Rpc,Rn,0xFC000003)); JITRegAlloc_LockReg(&ra, Rn, -1, false); Rn = Rpc; } temp |= (Rn<<16); } /* If Rn isn't used, leave as zero (preferred MOV/MVN encoding) */ if (Decoder_DataProc_IsShiftedReg(&instr)) { if (Rs == JIT_H_REG_NONE) { Rs = JITRegAlloc_MapReg(&ra, Decoder_Rs(&instr), 1, JIT_H_REG_NONE, JIT_AL); if (Decoder_Rs(&instr) == JIT_E_PC) { /* Mask out the flags into a temp reg */ Rpc = JITRegAlloc_MapReg(&ra, JIT_E_Temp2, 1, JIT_H_REG_NONE, JIT_NV); ForwardCodeBlock_WriteCode(&block, JITCodeGen_DataProc_Rd_Rn_Imm(cc,JIT_BIC,Rpc,Rs,0xFC000003)); JITRegAlloc_LockReg(&ra, Rs, -1, false); Rs = Rpc; } /* If Rn or Rm are the PC, we also need to add 4 to them */ if ((Rn != JIT_H_REG_NONE) && (Decoder_Rn(&instr) == JIT_E_PC)) { ForwardCodeBlock_WriteCode(&block, JITCodeGen_DataProc_Rd_Rn_Imm(cc,JIT_ADD,Rpc,Rn,4)); } if ((Rm != JIT_H_REG_NONE) && (Decoder_Rm(&instr) == JIT_E_PC)) { /* Yuck, this will be the full PC Transfer into another temp */ Rpc = JITRegAlloc_MapReg(&ra, JIT_E_Temp3, 1, JIT_H_REG_NONE, JIT_NV); ForwardCodeBlock_WriteCode(&block, JITCodeGen_DataProc_Rd_Rn_Imm(cc,JIT_ADD,Rpc,Rm,4)); JITRegAlloc_LockReg(&ra, Rm, -1, false); Rm = Rpc; temp = (temp & ~0xf) | Rm; } } temp = (temp & ~0xf00) | (Rs<<8); } /* Map Rd last so we can conditionally load it based on CC */ if (Decoder_DataProc_UsesRd(&instr)) { if (Rd == JIT_H_REG_NONE) { Rd = JITRegAlloc_MapReg(&ra, Decoder_Rd(&instr), 1, JIT_H_REG_NONE, cc ^ (1<<28)); } temp |= (Rd<<12); } /* If Rd isn't used leave as zero (preferred CMP, etc. encoding) N.B. this will be wrong when we start supporting CMPP (need to decode them as a different InstrType?) */ /* Perform the op */ ForwardCodeBlock_WriteCode(&block, temp); /* Write back any result */ if (Decoder_DataProc_SFlag(&instr)) { cgstate.psr_dirty = true; } JITRegAlloc_LockReg(&ra, Rn, -1, false); JITRegAlloc_LockReg(&ra, Rd, -1, true); JITRegAlloc_LockReg(&ra, Rm, -1, false); JITRegAlloc_LockReg(&ra, Rs, -1, false); break; case InstrType_Branch: if (!cgstate.psr_loaded && Decoder_Conditional(&instr)) { /* PSR needed but not loaded yet */ JITHostReg psrreg = JITRegAlloc_MapReg(&ra, JIT_E_PC, 0, JIT_H_REG_NONE, JIT_AL); ForwardCodeBlock_WriteCode(&block, JITCodeGen_LoadNZCV(JIT_AL,psrreg)); cgstate.psr_loaded = true; } #ifdef DEBUG_JIT_METRICS exit_points++; #endif /* Pre-increment these? */ cgstate.instrs++; cgstate.cycles++; if (cc != JIT_AL) { /* Branch to an exit on CC */ if (!block2_init) { block2_init = true; ForwardCodeBlock_New(&block2, false); } ForwardCodeBlock_WriteCode(&block, JITCodeGen_Branch(cc,block2.nextinstr-block.nextinstr)); codegen_handle_branch_cc(&ra,&cgstate,&block2,&instr); } else { codegen_handle_branch(&ra,&cgstate,&instr); incode = false; /* Don't assume it will return */ } goto next_instr; break; case InstrType_LDRSTR: if (!cgstate.psr_loaded && (Decoder_Conditional(&instr) || Decoder_LDRSTR_CarryIn(&instr))) { /* PSR needed but not loaded yet */ JITHostReg psrreg = JITRegAlloc_MapReg(&ra, JIT_E_PC, 0, JIT_H_REG_NONE, JIT_AL); ForwardCodeBlock_WriteCode(&block, JITCodeGen_LoadNZCV(JIT_AL,psrreg)); cgstate.psr_loaded = true; } if (cgstate.psr_dirty /* Force PSR writeback (fastmap will clobber it) */ || (Decoder_Rn(&instr) == JIT_E_PC) /* Force PC write-back if needed as input */ || (!Decoder_LDRSTR_ImmFlag(&instr) && (Decoder_Rm(&instr) == JIT_E_PC))) { codegen_sync_pc(&ra,&cgstate,true); } cgstate.psr_loaded = false; /* About to be clobbered */ if (!block2_init) { block2_init = true; ForwardCodeBlock_New(&block2, false); } if (cc != JIT_AL) { codegen_handle_ldr_cc(&ra,&cgstate,cc,&instr,&block2); } else { codegen_handle_ldr(&ra,&cgstate,&instr,&block2); cgstate.cycles++; } break; case InstrType_LDMSTM: if (!cgstate.psr_loaded && Decoder_Conditional(&instr)) { /* PSR needed but not loaded yet */ JITHostReg psrreg = JITRegAlloc_MapReg(&ra, JIT_E_PC, 0, JIT_H_REG_NONE, JIT_AL); ForwardCodeBlock_WriteCode(&block, JITCodeGen_LoadNZCV(JIT_AL,psrreg)); cgstate.psr_loaded = true; } if (cgstate.psr_dirty) { /* Force PSR writeback (fastmap will clobber it) */ codegen_sync_pc(&ra,&cgstate,true); } cgstate.psr_loaded = false; /* About to be clobbered */ if (!block2_init) { block2_init = true; ForwardCodeBlock_New(&block2, false); } if (cc != JIT_AL) { codegen_handle_ldm_cc(&ra,&cgstate,cc,&instr,&block2); } else { codegen_handle_ldm(&ra,&cgstate,&instr,&block2); cgstate.cycles+=Decoder_LDMSTM_Cycles(&instr); } break; default: assert(0); break; } cgstate.instrs++; cgstate.cycles++; next_instr:; #ifdef DEBUG_JIT_SINGLE_INSTR if (incode) { codegen_sync(&ra,&cgstate); JITEmuInterf2_WriteEpilogue(&ra,JITResult_Normal); incode = false; #ifdef DEBUG_JIT_METRICS exit_points++; #endif } #endif } } addr = (void *) (((uintptr_t) addr)+4); } /* If we're still in code at the end, terminate the block with Normal result */ if (incode) { codegen_sync(&ra,&cgstate); JITEmuInterf2_WriteEpilogue(&ra,JITResult_Normal); #ifdef DEBUG_JIT_METRICS exit_points++; #endif } /* Finish generating code */ CodeBlock_ClaimID(page->codeblock, page); DirtyRanges_Flush(); #ifdef JIT_DEBUG fprintf(stderr,"JIT done\n"); #endif #ifdef DEBUG_JIT_METRICS if (entry_points < JITPAGE_SIZE/4) jitmetrics.entry_points[entry_points]++; if (exit_points < JITPAGE_SIZE/4) jitmetrics.exit_points[exit_points]++; #endif } #ifdef DEBUG_JIT_TEST_EXEC #include "../armemu.h" extern void extern_execute_instruction(ARMul_State *state,ARMword instr,ARMword r15); extern JITResult test_exec(JITEmuState *state,ARMword *addr); JITResult test_exec(JITEmuState *state,ARMword *addr) { /* Make a backup of the state */ const JITState *jit = JIT_GetState(state); #ifndef DEBUG_JIT_FAKE ARMword r[16],r2[16]; CycleCount cyc = state->NumCycles; #endif JITResult res; JITPage *page = JITPage_Get(jit,addr); ARMword *page_end = (ARMword *) JITPage_StartOfPage(jit,page); ARMword *initial = addr; JITFunc *func = JIT_Phy2Func(jit,addr); page_end += JITPAGE_SIZE/4; #ifndef DEBUG_JIT_FAKE memcpy(r,state->Reg,sizeof(r)); #endif /* Single-step over all code */ while ((addr != page_end) && MemAttr_GetCodeFlag(jit,addr)) { ARMword oldr15 = state->Reg[15]; /* Stop here for any taken load instruction */ { Instruction instr; Decoder_Decode(&instr, *addr); if ((instr.type == InstrType_LDRSTR) && ARMul_CCCheck(instr.instr,(oldr15 & CCBITS))) { break; } } state->NextInstr = NORMAL; state->NumCycles++; extern_execute_instruction(state,*addr,state->Reg[15]); if (state->NextInstr == NORMAL) { if ((oldr15 & 0xfffffff) != (state->Reg[15] & 0xfffffff)) { fprintf(stderr,"Unexpected PC modification %08x -> %08x @ %08x\n",oldr15,state->Reg[15],*addr); } state->Reg[15]+=4; } else if (state->NextInstr != PCINCED) { fprintf(stderr,"Unexpected pipeline state %d @ %08x\n",state->NextInstr,*addr); } addr++; #ifdef DEBUG_JIT_SINGLE_INSTR break; #endif } #ifndef DEBUG_JIT_FAKE /* Copy state again */ memcpy(r2,state->Reg,sizeof(r)); /* Restore original state */ memcpy(state->Reg,r,sizeof(r)); state->NumCycles = cyc; /* Run the code */ res = (*func)(state,initial); /* Check for consistency */ if (memcmp(state->Reg,r2,sizeof(r))) { int i; fprintf(stderr,"JIT inconsistency!\n"); fprintf(stderr,"Orig:"); for(i=0;i<16;i++) { fprintf(stderr," %08x",r[i]); } fprintf(stderr,"\nStep:"); for(i=0;i<16;i++) { fprintf(stderr," %08x",r2[i]); } fprintf(stderr,"\nJIT :"); for(i=0;i<16;i++) { fprintf(stderr," %08x",state->Reg[i]); } fprintf(stderr,"\nCode:\n"); for(i=0;initial+i != (addr+1);i++) { char *str; _swix(Debugger_Disassemble,_INR(0,1)|_OUT(1),initial[i],(r[15]&0x03fffffc)-8+(i*4),&str); fprintf(stderr,"%08x %08x %s\n",(r[15]&0x03fffffc)-8+(i*4),initial[i],str); } #if 0 /* Broken, MOV PC, R14 is no longer the only terminating instruction (plus there could be branches) */ fprintf(stderr,"\nJIT:\n"); addr = (ARMword *) *func; do { char *str; _swix(Debugger_Disassemble,_INR(0,1)|_OUT(1),*addr,addr,&str); fprintf(stderr,"%08x %08x %s\n",addr,*addr,str); } while (*(addr++) != 0xe1a0f00e); #else fprintf(stderr,"\nJIT: %08x\n",*func); #endif assert(0); } return res; #else state->NextInstr = (addr==page_end ? PCINCED : NORMAL); return (addr==page_end ? JITResult_Normal : JITResult_Interpret); #endif } #endif JITResult JIT_Generate(JITEmuState *state,void *addr) { const JITState *jit = JIT_GetState(state); Instruction instr; uint32_t *input = (uint32_t *) addr; Decoder_Decode(&instr,*input); if (!JITable(&instr)) { return JITResult_Interpret; } #ifdef DEBUG_JIT_METRICS jitmetrics.generate_count++; #endif /* Flag this location as being an entry point */ MemAttr_SetEntryPointFlag(jit,addr); /* Generate all the code for this page */ JIT_GeneratePage(state,JITPage_Get(jit,addr)); /* Re-fetch and execute the JIT function (mustn't be JIT_Generate!) */ #ifdef DEBUG_JIT_TEST_EXEC return test_exec(state,(ARMword *) addr); #else return (*JIT_Phy2Func(jit,addr))(state,addr); #endif } --- NEW FILE: jitpage.h --- #ifndef JITPAGE_HEADER #define JITPAGE_HEADER #include "jitstate.h" #define JITPAGE_SIZE 4096 struct JITPage { int codeblock; /* ID of first code block in chain, -1 if no code exists for this page */ /* ??? do we need anything here? */ }; extern void JITPage_Init(JITState *jit,uint32_t romramchunksize); /* Get the start address of this page */ static inline void *JITPage_StartOfPage(const JITState *jit,JITPage *page) { int idx = page - jit->pages; return (void *) (jit->romramchunk + idx*JITPAGE_SIZE); } /* Forget any generated code for this page (but retain memory attributes) */ extern void JITPage_ForgetCode(const JITState *jit,JITPage *page); /* Get the page that contains this address */ static inline JITPage *JITPage_Get(const JITState *jit,void *addr) { uint32_t idx = (((uintptr_t) addr) - jit->romramchunk)/JITPAGE_SIZE; return &jit->pages[idx]; } /* Called when code is overwritten by data: Forget generated code, reset memory attributes */ extern void JITPage_ClobberCode(const JITState *jit,JITPage *page); /* Clobber code by address */ extern void JITPage_ClobberCodeByAddr(const JITState *jit,void *addr); #endif --- NEW FILE: jitpage.c --- #include <stdlib.h> #include "jitpage.h" #include "jit.h" #include "codeblocks.h" #include "memattr.h" void JITPage_Init(JITState *jit,uint32_t romramchunksize) { int numpages = romramchunksize/JITPAGE_SIZE; jit->pages = (JITPage *) calloc(numpages,sizeof(JITPage)); while (numpages--) { jit->pages[numpages].codeblock = -1; } } void JITPage_ForgetCode(const JITState *jit,JITPage *page) { if (page->codeblock == -1) { return; } /* Unlink with the generated code */ CodeBlock_InvalidateID(page->codeblock); page->codeblock = -1; /* Reset all the pointers */ void *addr = JITPage_StartOfPage(jit,page); JITFunc *func = JIT_Phy2Func(jit, addr); int i; for(i=0;i<JITPAGE_SIZE;i+=4) { *func++ = &JIT_Generate; } } void JITPage_ClobberCode(const JITState *jit,JITPage *page) { /* Release code */ JITPage_ForgetCode(jit,page); /* Reset memory attributes */ void *phy = JITPage_StartOfPage(jit,page); MemAttr_ClearFlagsRange(jit, phy, ((uint8_t *) phy) + JITPAGE_SIZE); } void JITPage_ClobberCodeByAddr(const JITState *jit,void *addr) { JITPage_ClobberCode(jit,JITPage_Get(jit,addr)); } --- NEW FILE: jit.h --- /* Manually include jitstate2.h first (necessary for everything to be defined in the right order with ArcEm) */ #include "jitstate2.h" #ifndef JIT_HEADER #define JIT_HEADER #include "memattr.h" #include "jitpage.h" /* Initialise the JIT * romramchunk must be a pointer to the memory block containing all ROM/RAM/VRAM etc. Each sub-section must start at a 4K offset into the block * romramchunksize must be the size of romramchunk (4K multiple) */ extern void JIT_Init(JITState *jit,uintptr_t romramchunk,uint32_t romramchunksize); /* Try to generate (and execute) code for the given address */ extern JITResult JIT_Generate(JITEmuState *state,void *addr); /* Return location of JIT function associated with addr addr must be word aligned! */ static inline JITFunc *JIT_Phy2Func(const JITState *jit,void *addr) { return (JITFunc *) (((uintptr_t)addr) + jit->addr2func); } /* Clobber any generated code at the given address */ static inline void JIT_ClobberCode(JITEmuState *state,void *addr) { JITState *jit = JIT_GetState(state); if (!MemAttr_GetCodeFlag(jit,addr)) { return; } JITPage_ClobberCodeByAddr(jit,addr); } #endif --- NEW FILE: metrics.h --- #ifndef JITMETRICS_HEADER #define JITMETRICS_HEADER #ifdef DEBUG_JIT_METRICS #include "jitpage.h" #include "decoder.h" typedef enum { /* 0 ... InstrType_Count represent specific instructions that caused termination */ TerminateReason_Normal = InstrType_Count, /* Termination due to JITResult_Normal */ TerminateReason_Special, /* Execution has entered special memory region (or prefetch abort) */ TerminateReason_Count, } TerminateReason; typedef struct { /* generate_count gives the number of blocks that have been generated */ uint32_t generate_count; /* instructions_in counts the number of instructions that have been JITed */ uint32_t instructions_in; /* instructions_out counts the number of instructions output by the JIT */ uint32_t instructions_out; /* interpret_count counts the number of instructions that have been interpreted */ uint32_t interpret_count; /* terminate_reason[N] gives the number of times that a code block has exited due to reason N */ uint32_t terminate_reason[TerminateReason_Count]; #ifdef DEBUG_JIT_METRICS_EXEC /* execute_histogram[N] gives the number of times that a block of length N-1 instructions has been executed */ uint32_t execute_histogram[JITPAGE_SIZE/4]; #endif /* entry_points[N] gives the number of times that a page has been generated with N-1 entry points */ uint32_t entry_points[JITPAGE_SIZE/4]; /* exit_points[N] gives the number of times that a page has been generated with N-1 exit points */ uint32_t exit_points[JITPAGE_SIZE/4]; } JITMetrics; extern JITMetrics jitmetrics; extern void JITMetrics_Dump(void); #endif /* DEBUG_JIT_METRICS */ #endif --- NEW FILE: jitstate2.h --- /* Manually include armdefs.h first (necessary for everything to be defined in the right order with ArcEm) */ #include "../armdefs.h" #ifndef JITSTATE2_HEADER #define JITSTATE2_HEADER /* Accessor function to get JITState from JITEmuState */ static inline JITState *JIT_GetState(JITEmuState *state) { return &state->jit; } #endif --- NEW FILE: metrics.c --- #ifdef DEBUG_JIT_METRICS #include <stdio.h> #include <string.h> #include <inttypes.h> #include "metrics.h" JITMetrics jitmetrics; static const char *terminate_reasons[] = { "TerminateReason_NOP", "TerminateReason_Branch", "TerminateReason_DataProc", "TerminateReason_Multiply", "TerminateReason_LDRSTR", "TerminateReason_LDMSTM", "TerminateReason_SWI", "TerminateReason_OtherInstr", "TerminateReason_Normal", "TerminateReason_Special", }; void JITMetrics_Dump(void) { uint64_t execute_total = 0; int i; fprintf(stderr,"%12u generate_count\n",jitmetrics.generate_count); fprintf(stderr,"%12u instructions_in\n",jitmetrics.instructions_in); fprintf(stderr,"%12u instructions_out\n",jitmetrics.instructions_out); #ifdef DEBUG_JIT_METRICS_EXEC for(i=0;i<JITPAGE_SIZE/4;i++) { if (jitmetrics.execute_histogram[i]) { fprintf(stderr,"%12u %4d execute_histogram\n",jitmetrics.execute_histogram[i],i+1); execute_total += ((uint64_t) jitmetrics.execute_histogram[i])*(i+1); } } fprintf(stderr,"%12" PRIu64 " execute_total\n",execute_total); #endif fprintf(stderr,"%12u interpret_count\n",jitmetrics.interpret_count); for(i=0;i<TerminateReason_Count;i++) { fprintf(stderr,"%12u %s\n",jitmetrics.terminate_reason[i],terminate_reasons[i]); } for(i=0;i<JITPAGE_SIZE/4;i++) { if (jitmetrics.entry_points[i]) { fprintf(stderr,"%12u %4d entry_points\n",jitmetrics.entry_points[i],i+1); } } for(i=0;i<JITPAGE_SIZE/4;i++) { if (jitmetrics.exit_points[i]) { fprintf(stderr,"%12u %4d exit_points\n",jitmetrics.exit_points[i],i+1); } } memset(&jitmetrics,0,sizeof(jitmetrics)); } #endif --- NEW FILE: decoder.c --- #include "decoder.h" void Decoder_Decode(Instruction *out, uint32_t instr) { out->instr = instr; if (instr >= 0xF0000000) { /* NV */ out->type = InstrType_NOP; return; } switch ((instr >> 24) & 0xf) { case 0x0: if ((instr & 0x0fc000f0) == 0x90) { out->type = InstrType_Multiply; break; } case 0x1: case 0x2: case 0x3: if (Decoder_DataProc_IsCompare(out) && !Decoder_DataProc_SFlag(out)) { /* Detect MRS, MSR, etc. at this decode phase */ /* XXX cycle counting will be wrong here? */ out->type = InstrType_NOP; break; } /* Non-immediate with bit 7 & 4 set isn't valid */ if (!Decoder_DataProc_ImmFlag(out) && ((instr & 0x90) == 0x90)) { out->type = InstrType_Other; break; } out->type = InstrType_DataProc; break; case 0x4: case 0x5: case 0x6: case 0x7: out->type = InstrType_LDRSTR; break; case 0x8: case 0x9: out->type = InstrType_LDMSTM; break; case 0xa: case 0xb: out->type = InstrType_Branch; break; case 0xc: /* LDC/STC */ case 0xd: /* LDC/STC */ case 0xe: /* CDP */ out->type = InstrType_Other; case 0xf: out->type = InstrType_SWI; break; /* TODO: SWP */ } } int Decoder_LDMSTM_NumRegs(const Instruction *instr) { int i,regs = 0; for(i=0;i<16;i++) { if (instr->instr & (1<<i)) { regs++; } } return regs; } int Decoder_LDMSTM_Cycles(const Instruction *instr) { /* XXX fix this to be correct for all cases */ return Decoder_LDMSTM_NumRegs(instr) + 2; } --- NEW FILE: emuinterf.c --- #include "emuinterf.h" #include "../arch/armarc.h" #include "emuinterf2.h" #include "decoder.h" #include "codeblocks.h" JITEmuState *JITEmuInterf_GetState(void) { return &statestr; } void JITEmuInterf2_WriteEpilogue(JITRegAlloc *ra, JITResult result) { #ifdef DEBUG_JIT_FORCE_NORMAL result = JITResult_Normal; #endif JITHostReg tempreg = JITRegAlloc_MapReg(ra, JIT_E_Temp, 1, JIT_H_REG_NONE, JIT_NV); /* Set correct pipeline state The interpreter assumes NORMAL when executing instructions, so if we're returning with JITResult_Interpret then we must be NORMAL Otherwise, set to PCINCED since we have fully advanced the PC */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Imm(JIT_AL,JIT_MOV,tempreg,(result==JITResult_Normal?PCINCED:NORMAL))); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_StoreImm(JIT_AL,tempreg,ra->emu_regs[JIT_E_StateReg].hostreg,offsetof(ARMul_State,NextInstr))); /* XXX Aborted, AbortSig? */ /* Return */ JITRegAlloc_LockReg(ra,tempreg,-1,false); JITRegAlloc_WriteBackDirty(ra); /* XXX hack, using R0 without claiming */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Imm(JIT_AL,JIT_MOV,0,result)); JITRegAlloc_CalleeRestore(ra, true); } void JITEmuInterf2_WriteLoad(JITRegAlloc *ra, JITHostReg Rd, JITHostReg Raddr, bool byte, uintptr_t abort) { JITHostReg fastmap, fastmapmode, temp; /* Check for address exceptions */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rn_Imm(JIT_AL, JIT_CMP, Raddr, 0x04000000)); /* CMP to clear V */ /* Load the fastmap regs */ fastmap = JITRegAlloc_MapReg(ra, JIT_E_Temp2, 1, JIT_H_REG_NONE, JIT_NV); fastmapmode = JITRegAlloc_MapReg(ra, JIT_E_Temp3, 1, JIT_H_REG_NONE, JIT_NV); temp = JITRegAlloc_MapReg(ra, JIT_E_Temp4, 1, JIT_H_REG_NONE, JIT_NV); /* Now that regs are allocated, we can safely branch away */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_LoadImm(JIT_AL,fastmap,ra->emu_regs[JIT_E_StateReg].hostreg,offsetof(ARMul_State,FastMap))); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_LoadImm(JIT_AL,fastmapmode,ra->emu_regs[JIT_E_StateReg].hostreg,offsetof(ARMul_State,FastMapMode))); /* Trigger address exception */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Branch(JIT_CS, abort-ra->block->nextinstr)); /* Load FlagsAndData */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL, JIT_BIC, temp, Raddr, 0xf00)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Load_Rm(JIT_AL,temp,fastmap,temp | 0x4a0)); /* LSR #9 */ /* Check type & flags */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rn_Rm(JIT_AL, JIT_TST, temp, fastmapmode)); /* Reduced decoding: normally if all of bits 24-30 are clear it signifies an abort, with bit 31 signifying whether an access function is required But we only deal with the direct access case, so if N is set or 24-30 are clear we need to bail V=0 from CMP, so we can use LE condition code to detect failure */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Branch(JIT_LE, abort-ra->block->nextinstr)); /* Now we can load the data! */ if (byte) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Load_Rm(JIT_AL | (1<<22),Rd,Raddr,temp | 0x400)); /* LSL #8 */ } else { /* TODO do rotated load if host supports it */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL, JIT_BIC, fastmapmode, Raddr, 3)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rm(JIT_AL, JIT_MOV | JIT_S, fastmap, Raddr | 0xf80)); /* LSL #31 */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Load_Rm(JIT_AL,Rd,fastmapmode,temp | 0x400)); /* LSL #8 */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rm(JIT_CS, JIT_MOV, Rd, Rd | 0x860)); /* ROR #16 */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rm(JIT_MI, JIT_MOV, Rd, Rd | 0x460)); /* ROR #8 */ } /* Release regs */ JITRegAlloc_LockReg(ra, fastmap, -1, false); JITRegAlloc_LockReg(ra, fastmapmode, -1, false); JITRegAlloc_LockReg(ra, temp, -1, false); } void JITEmuInterf2_WriteLDM(JITRegAlloc *ra, JITHostReg Rn, JITHostReg Raddr, const Instruction *instr, uintptr_t abort) { JITHostReg fastmap, fastmapmode, temp; int i = Decoder_LDMSTM_NumRegs(instr); /* Load the fastmap regs */ fastmap = JITRegAlloc_MapReg(ra, JIT_E_Temp2, 1, JIT_H_REG_NONE, JIT_NV); fastmapmode = JITRegAlloc_MapReg(ra, JIT_E_Temp3, 1, JIT_H_REG_NONE, JIT_NV); temp = JITRegAlloc_MapReg(ra, JIT_E_Temp4, 1, JIT_H_REG_NONE, JIT_NV); /* Check for crossing page boundaries */ if (i > 1) { ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rm(JIT_AL, JIT_MOV, temp, Raddr | 0xa00)); /* LSL #20 */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL, JIT_ADD | JIT_S, temp, temp, (i-1)<<(2+20))); } ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_LoadImm(JIT_AL,fastmap,ra->emu_regs[JIT_E_StateReg].hostreg,offsetof(ARMul_State,FastMap))); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_LoadImm(JIT_AL,fastmapmode,ra->emu_regs[JIT_E_StateReg].hostreg,offsetof(ARMul_State,FastMapMode))); /* Check for address exceptions */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rn_Imm((i > 1 ? JIT_CC : JIT_AL), JIT_CMP, Raddr, 0x04000000)); /* CMP to clear V */ /* Trigger address exception */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Branch(JIT_CS, abort-ra->block->nextinstr)); /* Load FlagsAndData */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Imm(JIT_AL, JIT_BIC, temp, Raddr, 0xf00)); ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Load_Rm(JIT_AL,temp,fastmap,temp | 0x4a0)); /* LSR #9 */ /* Check type & flags */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rn_Rm(JIT_AL, JIT_TST, temp, fastmapmode)); /* Reduced decoding: normally if all of bits 24-30 are clear it signifies an abort, with bit 31 signifying whether an access function is required But we only deal with the direct access case, so if N is set or 24-30 are clear we need to bail V=0 from CMP, so we can use LE condition code to detect failure */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_Branch(JIT_LE, abort-ra->block->nextinstr)); /* Update Raddr to be the actual address */ ForwardCodeBlock_WriteCode(ra->block, JITCodeGen_DataProc_Rd_Rn_Rm(JIT_AL, JIT_ADD, Raddr, Raddr, temp | 0x400)); /* LSL #8 */ /* Load into the register if it's resident, otherwise go via temp */ for(i=0;i<16;i++) { if (!(instr->instr & (1<<i))) { continue; } JITHostReg dest = ra->emu_regs[i].hostreg; if (dest == JIT_H_REG_NONE) { dest = temp; } ForwardCodeBlock_WriteCode(ra->block, 0xe4900004 | (dest << 12) | (Raddr << 16)); /* LDR dest, [Raddr], #4 */ /* Immediately write back the value (yuck) */ ForwardCodeBlock_WriteCode(ra->block, JITEmuInterf2_StoreReg(JIT_AL, dest, ra->emu_regs[JIT_E_StateReg].hostreg, i)); } /* Release regs */ JITRegAlloc_LockReg(ra, fastmap, -1, false); JITRegAlloc_LockReg(ra, fastmapmode, -1, false); JITRegAlloc_LockReg(ra, temp, -1, false); } --- NEW FILE: decoder.h --- #ifndef DECODER_HEADER #define DECODER_HEADER #include "emuinterf.h" /* ARMv2 instruction decoder */ #define JIT_EQ (0u<<28) #define JIT_NE (1u<<28) #define JIT_CS (2u<<28) #define JIT_CC (3u<<28) #define JIT_MI (4u<<28) #define JIT_PL (5u<<28) #define JIT_VS (6u<<28) #define JIT_VC (7u<<28) #define JIT_HI (8u<<28) #define JIT_LS (9u<<28) #define JIT_GE (10u<<28) #define JIT_LT (11u<<28) #define JIT_GT (12u<<28) #define JIT_LE (13u<<28) #define JIT_AL (14u<<28) #define JIT_NV (15u<<28) #define JIT_AND (0u<<20) #define JIT_EOR (2u<<20) #define JIT_SUB (4u<<20) #define JIT_RSB (6u<<20) #define JIT_ADD (8u<<20) #define JIT_ADC (10u<<20) #define JIT_SBC (12u<<20) #define JIT_RSC (14u<<20) #define JIT_TST (16u<<20) #define JIT_TEQ (18u<<20) #define JIT_CMP (20u<<20) #define JIT_CMN (22u<<20) #define JIT_ORR (24u<<20) #define JIT_MOV (26u<<20) #define JIT_BIC (28u<<20) #define JIT_MVN (30u<<20) #define JIT_S (1<<20) typedef enum { InstrType_NOP, InstrType_Branch, InstrType_DataProc, InstrType_Multiply, InstrType_LDRSTR, InstrType_LDMSTM, InstrType_SWI, InstrType_Other, InstrType_Count, } InstrType; typedef struct { InstrType type; uint32_t instr; } Instruction; extern void Decoder_Decode(Instruction *out, uint32_t instr); static inline uint32_t Decoder_CC(const Instruction *instr) { return instr->instr & 0xF0000000; } static inline uint32_t Decoder_Conditional(const Instruction *instr) { return instr->instr < JIT_AL; } static inline uint32_t Decoder_Branch_BLFlag(const Instruction *instr) { return instr->instr & (1<<24); } static inline int32_t Decoder_Branch_Offset(const Instruction *instr) { return (((int32_t) (instr->instr << 8)) >> 6) + 8; } /* =0 if op2 is a (shifted) register */ static inline uint32_t Decoder_DataProc_ImmFlag(const Instruction *instr) { return instr->instr & (1<<25); } static inline uint32_t Decoder_DataProc_SFlag(const Instruction *instr) { return instr->instr & (1<<20); } static inline bool Decoder_DataProc_IsShiftedReg(const Instruction *instr) { return !Decoder_DataProc_ImmFlag(instr) && ((instr->instr & 0x90) == 0x10); } static inline uint32_t Decoder_DataProc_Op(const Instruction *instr) { return instr->instr & (15<<21); } static inline bool Decoder_DataProc_IsCompare(const Instruction *instr) { return (Decoder_DataProc_Op(instr)>>23) == 2; } static inline bool Decoder_DataProc_UsesRn(const Instruction *instr) { /* MOV, MVN don't use Rn */ uint32_t op = Decoder_DataProc_Op(instr); return (op != JIT_MOV) && (op != JIT_MVN); } /* Non-compare instructions always use Rd Compare instructions which have Rd==15 use Rd (P suffix) */ static inline bool Decoder_DataProc_UsesRd(const Instruction *instr) { return (((instr->instr>>12)&15) == 15) || !Decoder_DataProc_IsCompare(instr); } /* Returns true if the C flag is used as an input of the ALU */ static inline bool Decoder_DataProc_CarryIn(const Instruction *instr) { /* ADC, SBC, RSC use carry */ uint32_t op = Decoder_DataProc_Op(instr); if ((op == JIT_ADC) || (op == JIT_SBC) || (op == JIT_RSC)) { return true; } /* RRX uses carry */ if (Decoder_DataProc_ImmFlag(instr)) { return false; } return ((instr->instr & 0xff0) == 0x060); } /* =0 if MUL, else MLA */ static inline uint32_t Decoder_Multiply_AccumFlag(const Instruction *instr) { return instr->instr & (1<<21); } static inline uint32_t Decoder_Multiply_SFlag(const Instruction *instr) { return instr->instr & (1<<20); } /* =0 if store */ static inline uint32_t Decoder_LDRSTR_LoadFlag(const Instruction *instr) { return instr->instr & (1<<20); } /* !=0 if pre-indexed */ static inline uint32_t Decoder_LDRSTR_PreFlag(const Instruction *instr) { return instr->instr & (1<<24); } /* true if writeback */ static inline bool Decoder_LDRSTR_WritebackFlag(const Instruction *instr) { return !Decoder_LDRSTR_PreFlag(instr) || (instr->instr & (1<<21)); } /* true if T flag */ static inline bool Decoder_LDRSTR_TFlag(const Instruction *instr) { return !Decoder_LDRSTR_PreFlag(instr) && (instr->instr & (1<<21)); } /* !=0 if byte */ static inline uint32_t Decoder_LDRSTR_ByteFlag(const Instruction *instr) { return instr->instr & (1<<22); } /* !=0 if up */ static inline uint32_t Decoder_LDRSTR_UpFlag(const Instruction *instr) { return instr->instr & (1<<23); } /* =0 if op2 is a (shifted) register */ static inline bool Decoder_LDRSTR_ImmFlag(const Instruction *instr) { return !(instr->instr & (1<<25)); } /* true if offset is (probably) non-zero */ static inline bool Decoder_LDRSTR_HasOffset(const Instruction *instr) { return !Decoder_LDRSTR_ImmFlag(instr) || (instr->instr & 0xfff); } /* Returns true if the C flag is used as an input of the ALU */ static inline bool Decoder_LDRSTR_CarryIn(const Instruction *instr) { /* RRX uses carry */ if (Decoder_LDRSTR_ImmFlag(instr)) { return false; } return ((instr->instr & 0xfe0) == 0x060); } /* =0 if store */ static inline uint32_t Decoder_LDMSTM_LoadFlag(const Instruction *instr) { return instr->instr & (1<<20); } /* !=0 if writeback */ static inline uint32_t Decoder_LDMSTM_WritebackFlag(const Instruction *instr) { return instr->instr & (1<<21); } /* !=0 if ^ */ static inline uint32_t Decoder_LDMSTM_HatFlag(const Instruction *instr) { return instr->instr & (1<<22); } /* !=0 if up */ static inline uint32_t Decoder_LDMSTM_UpFlag(const Instruction *instr) { return instr->instr & (1<<23); } /* !=0 if pre-indexed */ static inline uint32_t Decoder_LDMSTM_PreFlag(const Instruction *instr) { return instr->instr & (1<<24); } extern int Decoder_LDMSTM_NumRegs(const Instruction *instr); extern int Decoder_LDMSTM_Cycles(const Instruction *instr); /* Validity: DataProc && UsesRn Multiply && AccumFlag LDRSTR LDMSTM */ static inline uint32_t Decoder_Rn(const Instruction *instr) { return (instr->instr >> 16) & 0xf; } /* Validity: DataProc && UsesRd Multiply LDRSTR */ static inline uint32_t Decoder_Rd(const Instruction *instr) { return (instr->instr >> 12) & 0xf; } /* Validity: DataProc && !ImmFlag Multiply LDRSTR && !ImmFlag */ static inline uint32_t Decoder_Rm(const Instruction *instr) { return instr->instr & 0xf; } /* Validity: DataProc && IsShiftedReg Multiply */ static inline uint32_t Decoder_Rs(const Instruction *instr) { return (instr->instr >> 8) & 0xf; } #endif --- NEW FILE: emuinterf.h --- #ifndef JITEMUINTERF_HEADER #define JITEMUINTERF_HEADER /* Interface from the JIT to the main emulator Emulator-specific stuff goes here! */ #include "../c99.h" /* Type used by emulator to store its state */ typedef struct ARMul_State JITEmuState; /* Return pointer to global state object */ extern JITEmuState *JITEmuInterf_GetState(void); #endif ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot -- arcem-cvs mailing list arcem-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/arcem-cvs