op jit.t

Leopold Toetsch Mon, 25 Oct 2004 03:24:30 -0700

cvsuser     04/10/25 03:24:17


  Modified:    src      jit.c
               t/op     jit.t
  Log:
  more comments and details
  
  Revision  Changes    Path
  1.95      +146 -38   parrot/src/jit.c
  
  Index: jit.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/jit.c,v
  retrieving revision 1.94
  retrieving revision 1.95
  diff -u -r1.94 -r1.95
  --- jit.c     24 Oct 2004 14:47:27 -0000      1.94
  +++ jit.c     25 Oct 2004 10:24:14 -0000      1.95
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: jit.c,v 1.94 2004/10/24 14:47:27 leo Exp $
  +$Id: jit.c,v 1.95 2004/10/25 10:24:14 leo Exp $
   
   =head1 NAME
   
  @@ -11,6 +11,9 @@
   JIT (Just In Time) compilation converts bytecode to native machine code
   instructions and executes the generated instruction sequence directly.
   
  +Actually it's not really just in time, it's just before this piece of code is
  +used and not per subroutine or even opcode, it works per bytecode segment.
  +
   =head2 Functions
   
   =over 4
  @@ -31,7 +34,7 @@
   
   extern int jit_op_count(void);
   /*
  - * s. jit/$arch/jit_emit.h for the meaning of these defs
  + * s. jit/$jitcpuarch/jit_emit.h for the meaning of these defs
    */
   
   #ifndef INT_REGISTERS_TO_MAP
  @@ -187,6 +190,12 @@
           int i, n;
           size_t rel_offset;
   
  +        /*
  +         * for all opcodes that are dynamically loaded, we can't have
  +         * a JITted version, so we execute the function incarnation
  +         * via the wrapper opcode, which just runs one opcode
  +         * see ops/core.ops for more
  +         */
           if (*cur_op >= jit_op_count())
              op = CORE_OPS_wrapper__;
   
  @@ -233,8 +242,19 @@
           if ((op_info->jump & PARROT_JUMP_ENEXT) ||
               (op_info->jump & PARROT_JUMP_GNEXT))
               branch[rel_offset + n] |= JIT_BRANCH_TARGET;
  -        if (op_info->jump & PARROT_JUMP_UNPREDICTABLE)
  +        if (op_info->jump & PARROT_JUMP_UNPREDICTABLE) {
  +            /*
  +             * TODO 
  +             *   this flag is currently not used or set
  +             *   and: if we have a branch that isn't going to a constant
  +             *   target like a calculated branch used by rx_ opcodes
  +             *   we are totally lost WRT register preservation.
  +             *   If we don't know, that the code is a branch target, inside
  +             *   a JITted code section, mapped registers might be
  +             *   not up to date WRT Parrot registers.
  +             */
               optimizer->has_unpredictable_jump = 1;
  +        }
           /* Move to the next opcode */
           cur_op += n;
       }
  @@ -278,11 +298,29 @@
               case PARROT_ARG_I:
               case PARROT_ARG_KI:
                   typ = 0;
  +                /*
  +                 * if the register number is negative, the register mapping
  +                 * was done by imcc/jit.c, which used negative numbers
  +                 * for allocated CPU registers. That's currently not
  +                 * functional because of changed register allocation
  +                 * strategy inside imcc.
  +                 * The code is still here and should probably be reactivated
  +                 * later, when things are stable: imcc has all the 
  +                 * necessary information like basic blocks and loop depth
  +                 * calculated already. A lot is duplicated here to regain this
  +                 * information.
  +                 */ 
                   if (idx < 0)
                       idx = -1 - idx;
                   break;
               case PARROT_ARG_P:
               case PARROT_ARG_K:
  +                /*
  +                 * P and S regs aren't currently used at all. That's not
  +                 * really optimal. If we have plenty of mappable registers
  +                 * and if we can call vtables or MMD functions directly
  +                 * we should finally allocate P and S regs too.
  +                 */
                   typ = 1;
                   break;
               case PARROT_ARG_S:
  @@ -395,7 +433,7 @@
   
           /*
            * End a section:
  -         * If this opcode is jitted and next calls a C function */
  +         * If this opcode is jitted and next is a C function */
           if (!EXTCALL(cur_op)) {
               cur_section->jit_op_count++;
   
  @@ -536,10 +574,9 @@
       /* Start from the first section */
       cur_section = optimizer->sections;
   
  -    /* While there is section */
       while (cur_section) {
           Parrot_jit_register_usage_t *ru = cur_section->ru;
  -        /* some up register usage for one block, don't change
  +        /* sum up register usage for one block, don't change
            * reg_dir. If allocation is done per section, block numbers
            * are different, so this is a nop
            */
  @@ -552,7 +589,7 @@
               next = next->next;
           }
   
  -        /* now sort registers by there usage count */
  +        /* now sort registers by their usage count */
           for (typ = 0; typ < 4; typ++) {
               /* find most used register */
               for (i = max_count = 0; i < NUM_REGISTERS; i++) {
  @@ -624,6 +661,40 @@
   Called by C<map_registers()> to actually assign the Parrot registers to
   hardware registers.
   
  +TODO
  +
  +Before actually assigning registers, we should optimize a bit:
  +
  +1) calculate max use count of register types for all sections 
  +
  +2) calculate costs for register preserving and restoring
  +   for two different allocation strategies:
  +
  +   a) allocate non-volatiles first
  +      overhead for jit_begin, jit_end:
  +      - 2 * max_used_non_volatile registers  
  +      overhead for register preserving around non-jitted sections:
  +      - only used IN arguments are saved
  +      - only OUT non-volatile arguments are restored
  +   b) allocate volatiles first
  +      no overhead for jit_begin, jit_end
  +      overhead per JITed op that calls a C function:
  +      - 2 * n_used_volatiles_to_preserve for each call
  +      overhead for register preserving around non-jitted sections:
  +      - all volatiles are saved and restored around non-jitted sections
  +
  +NB for all cost estimations size does matter: a 64bit double counts as
  +   two 32bit ints. Opcode count is assumed to be just one.
  +
  +3) depending on costs from 2) use one of the strategies
  +   That does still not account for any usage patterns. Imcc has loop
  +   nesting depth, but that's not available here. OTOH smaller code tends
  +   to perform better because of better cache usage.
  +
  +Usage analysis could show that a mixture of both strategies is best, e.g:
  +allocate 2-4 non-volatiles and the rest from volatiles. But that would
  +complicate the allocation code a bit.
  +
   =cut
   
   */
  @@ -742,7 +813,7 @@
       int i, typ;
       unsigned int j;
       const char *types = "IPSN";
  -    int t_l[] = {0,3};
  +    int types_to_list[] = {0,3};
   
       cur_section = optimizer->sections;
       while (cur_section) {
  @@ -776,9 +847,9 @@
           PIO_eprintf(interpreter, "\tend:\t%#p\t(%Ou)\n",
                   cur_section->end, *cur_section->end);
   
  -        for (j = 0; j < sizeof(t_l)/sizeof(int); j++) {
  +        for (j = 0; j < sizeof(types_to_list)/sizeof(int); j++) {
               char t;
  -            typ = t_l[j];
  +            typ = types_to_list[j];
               t = types[typ];
               PIO_eprintf(interpreter, "\t%c registers used:\t%i\n",
                       t, ru[typ].registers_used);
  @@ -814,7 +885,7 @@
   optimize_jit(Interp *interpreter, opcode_t *cur_op,
                opcode_t *code_start, opcode_t *code_end)>
   
  -Called by C<build_asm()> to initialize the optimizer.
  +Called by C<build_asm()> to run the optimizer.
   
   =cut
   
  @@ -836,7 +907,7 @@
       /* ok, let's loop again and generate the sections */
       make_sections(interpreter, optimizer, cur_op, code_start, code_end);
   
  -    /* look where a section jumps too */
  +    /* look where a section jumps to */
       make_branch_targets(interpreter, optimizer, code_start);
   
       /* This is where we start deciding which Parrot registers get
  @@ -994,13 +1065,13 @@
                             Interp * interpreter, int volatiles)>
   
   Load registers for the current section from parrot to processor registers.
  +If C<volatiles> is true, this code is used to restore these registers in
  +JITted code that calls out to Parrot.
   
   =cut
   
   */
   
  -
  -
   static void
   Parrot_jit_load_registers(Parrot_jit_info_t *jit_info,
                             Interp * interpreter, int volatiles)
  @@ -1012,6 +1083,7 @@
       void (*mov_f[4])(Interp *, int, int, size_t)
           = { Parrot_jit_emit_mov_rm_offs, 0, 0, Parrot_jit_emit_mov_rm_n_offs};
       size_t offs;
  +    int base_reg = 0;   /* -O3 warning */
   #else
       void (*mov_f[4])(Interp *, int, char *)
           = { Parrot_jit_emit_mov_rm, 0, 0, Parrot_jit_emit_mov_rm_n};
  @@ -1020,12 +1092,10 @@
       int lasts[] = { PRESERVED_INT_REGS, 0,0,  PRESERVED_FLOAT_REGS };
       char * maps[] = {0, 0, 0, 0};
       int first = 1;
  -    int base_reg = 0;   /* -O3 warning */
   
       maps[0] = jit_info->intval_map;
       maps[3] = jit_info->floatval_map;
   
  -
       for (typ = 0; typ < 4; typ++) {
           if (maps[typ]) {
               for (i = ru[typ].registers_used-1; i >= 0; --i) {
  @@ -1065,6 +1135,8 @@
                             Interp * interpreter, int volatiles)>
   
   Save registers for the current section.
  +If C<volatiles> is true, this code is used to preserve these registers in
  +JITted code that calls out to Parrot.
   
   =cut
   
  @@ -1081,6 +1153,7 @@
       void (*mov_f[4])(Interp * ,int, size_t, int)
           = { Parrot_jit_emit_mov_mr_offs, 0, 0, Parrot_jit_emit_mov_mr_n_offs};
       size_t offs;
  +    int base_reg = 0; /* -O3 warning */
   #else
       void (*mov_f[4])(Interp * , char *, int)
           = { Parrot_jit_emit_mov_mr, 0, 0, Parrot_jit_emit_mov_mr_n};
  @@ -1089,7 +1162,6 @@
       int lasts[] = { PRESERVED_INT_REGS, 0,0,  PRESERVED_FLOAT_REGS };
       char * maps[] = {0, 0, 0, 0};
       int first = 1;
  -    int base_reg = 0; /* -O3 warning */
   
       maps[0] = jit_info->intval_map;
       maps[3] = jit_info->floatval_map;
  @@ -1181,6 +1253,15 @@
   native code, as well as by the native code itself to convert bytecode
   program counters values to hardware program counter values.
   
  +Finally this code here is used to generate native executables (or better
  +object files that are linked to executables), if EXEC_CAPABLE is defined.
  +This functionality is triggered by 
  +
  +  parrot -o foo.o foo.imc
  +
  +which uses the JIT engine to translate to native code inside the object
  +file.
  +
   =cut
   
   */
  @@ -1203,7 +1284,6 @@
       Parrot_exec_objfile_t *obj = (Parrot_exec_objfile_t *)objfile;
   #endif
   
  -
       /* XXX assume, we restart */
       if (interpreter->jit_info) {
           jit_info = interpreter->jit_info;
  @@ -1223,12 +1303,16 @@
   #endif
           op_func = op_jit;
   
  -
  +    /*
  +     * check if IMCC did all he work. If yes, we got a PF segment with
  +     * register allocation information inside.
  +     * See imcc/jit.c for more
  +     */
       name = mem_sys_allocate(strlen(interpreter->code->cur_cs->base.name) + 5);
       sprintf(name, "%s_JIT", interpreter->code->cur_cs->base.name);
       jit_seg = PackFile_find_segment(interpreter->code->cur_cs->base.dir,
               name, 0);
  -    free(name);
  +    mem_sys_free(name);
       if (jit_seg)
           jit_info->optimizer =
               optimize_imcc_jit(interpreter, pc, code_start, code_end, jit_seg);
  @@ -1240,7 +1324,6 @@
       jit_info->intval_map = intval_map;
       jit_info->floatval_map = floatval_map;
   
  -
       /* Byte code size in opcode_t's */
       jit_info->arena.map_size = (code_end - code_start) + 1;
       jit_info->arena.op_map =
  @@ -1268,16 +1351,22 @@
       jit_info->arena.fixups = NULL;
   
       /*
  -    *   op_map holds the offset from arena.start
  -    *   of the parrot op at the given opcode index
  -     *
  -    *  bytecode:       56  1   1   56  1   1
  -    *  op_map:         3   0   0   15  0   0
  -    */
  -
  +     * from C's ABI all the emitted code here is one (probably big)
  +     * function. So we have to generate an appropriate function
  +     * prolog, that makes all this look like a normal fuction ;)
  +     */
       Parrot_jit_begin(jit_info, interpreter);
  +    /*
  +     * the function epilog can basically be anywhere, that's done
  +     * by the Parrot_end opcode somewhere in core.jit
  +     */
   
  -    /* Set the offset of the first opcode */
  +    /*
  +     *   op_map holds the offset from arena.start
  +     *   of the parrot op at the given opcode index
  +     *
  +     * Set the offset of the first opcode 
  +     */
       jit_info->arena.op_map[jit_info->op_i].offset =
           jit_info->native_ptr - jit_info->arena.start;
   
  @@ -1330,7 +1419,14 @@
                * really checking if the target section has the same registers
                * mapped too.
                *
  -             * and also, if we have a jitted sections and encounter
  +             * Yep so: TODO
  +             * during register allocation try to use the same registers, if
  +             * its a loop or a plain branch and if register usage doesn't
  +             * differ too much. This could save a lot of register reloads.
  +             *
  +             * --
  +             *  
  +             * save also, if we have a jitted sections and encounter
                * an "end" opcode, e.g. in evaled code
                */
               if ((((map[cur_op - code_start] == JIT_BRANCH_SOURCE) &&
  @@ -1343,7 +1439,9 @@
               else if (CALLS_C_CODE(cur_opcode_byte)) {
                   /*
                    * a JITted function with a function call, we have to
  -                 * save volatile registers
  +                 * save volatile registers but 
  +                 * TODO not if the previous opcode was also one
  +                 *      that called C code
                    */
                   Parrot_jit_save_registers(jit_info, interpreter, 1);
               }
  @@ -1423,7 +1521,15 @@
       PIO_eprintf(interpreter, "\nTotal size %u bytes\n",
               (unsigned int)(jit_info->native_ptr - jit_info->arena.start));
   #endif
  -
  +    
  +/*
  + * TODO just call a sync function, which the architecture defines
  + *      or not if not
  + *
  + *      This should be generalized and go along with the executable
  + *      allocation functions as e.g. mem_close_executable()
  + */
  + 
   #ifdef PARROT_ARM
       arm_sync_d_i_cache(jit_info->arena.start, jit_info->native_ptr);
   #endif
  @@ -1441,8 +1547,13 @@
   
       /* assume gdb is available: generate symbol information  */
   #if defined __GNUC__ || defined __IBMC__
  -    if (Interp_flags_TEST(interpreter, PARROT_DEBUG_FLAG))
  +    if (Interp_flags_TEST(interpreter, PARROT_DEBUG_FLAG)) {
  +        /*
  +         * TODO same like above here e.g. create ASM listing of code
  +         *      if real debug support isn't available
  +         */
           Parrot_jit_debug(interpreter);
  +    }
   #endif
   
       return (jit_f)D2FPTR(jit_info->arena.start);
  @@ -1466,10 +1577,6 @@
   
       fixup = mem_sys_allocate_zeroed(sizeof(*fixup));
   
  -    if (!fixup)
  -        internal_exception(ALLOCATION_ERROR,
  -                           "System memory allocation failed\n");
  -
       /* Insert fixup at the head of the list */
       fixup->next = jit_info->arena.fixups;
       jit_info->arena.fixups = fixup;
  @@ -1485,7 +1592,8 @@
   
   =head1 SEE ALSO
   
  -F<include/parrot/jit.h>, F<docs/jit.pod> and F<src/jit_debug.c>.
  +F<include/parrot/jit.h>, F<docs/jit.pod>,d F<src/jit_debug.c>,
  +F<jit/$jitcpuarch/jit_emit.h>, F<jit/$jitcpuarch/core.jit>. 
   
   =cut
   
  
  
  
  1.11      +8 -2      parrot/t/op/jit.t
  
  Index: jit.t
  ===================================================================
  RCS file: /cvs/public/parrot/t/op/jit.t,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- jit.t     23 Oct 2004 13:21:39 -0000      1.10
  +++ jit.t     25 Oct 2004 10:24:17 -0000      1.11
  @@ -1,6 +1,6 @@
   #! perl -w
   # Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -# $Id: jit.t,v 1.10 2004/10/23 13:21:39 leo Exp $
  +# $Id: jit.t,v 1.11 2004/10/25 10:24:17 leo Exp $
   
   =head1 NAME
   
  @@ -12,7 +12,13 @@
   
   =head1 DESCRIPTION
   
  -Tests JIT register allocation.
  +Tests JIT register allocation. The tests are written for 4 mapped
  +registers per kind, i.e. the crippled x86 architecture. If you are 
  +experimenting with register allocation please just use settings like
  +in jit/i386/jit_emit, i.e. 4 mapped regs, 2 volatile ints ...
  +
  +These tests are of course usable for other run cores too, the results
  +should just be the same.
   
   =cut

cvs commit: parrot/t/op jit.t

Reply via email to