simon       01/09/13 06:42:42

  Modified:    .        assemble.pl
  Log:
  A new assembler, from Brian Wheeler.
  
  Revision  Changes    Path
  1.11      +172 -104  parrot/assemble.pl
  
  Index: assemble.pl
  ===================================================================
  RCS file: /home/perlcvs/parrot/assemble.pl,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -w -r1.10 -r1.11
  --- assemble.pl       2001/09/13 07:27:46     1.10
  +++ assemble.pl       2001/09/13 13:42:41     1.11
  @@ -1,38 +1,42 @@
   #! /usr/bin/perl -w
   #
   # assemble.pl - take a parrot assembly file and spit out a bytecode file
  +#   This is based heavily on assemble.pl by Dan Sugalski
  +# Brian Wheeler ([EMAIL PROTECTED])
   
   use strict;
   
  -my(%opcodes, %labels);
  -my ($output, $opt_c);
  +my $opt_c;
   if (@ARGV and $ARGV[0] eq "-c") {
       shift @ARGV;
       $opt_c = 1;
   }
   
  -my %pack_type;
  -%pack_type = (i => 'l',
  -           n => 'd',
  -       );
  -
  -my %real_type=('i'=>'l',
  -              'n'=>'d',
  -              'N'=>'l',
  -              'I'=>'l',
  -              'S'=>'l',
  -              's'=>'l',
  -              'D'=>'l');
   
  -my $sizeof_packi = length(pack($pack_type{i},1024));
  +# define data types
  +my(%pack_type)=('i'=>'l','n'=>'d');
  +my(%real_type)=('I'=>'i','i'=>'i',
  +                'N'=>'i','n'=>'n',
  +                'S'=>'i','s'=>'i',
  +                'D'=>'i');
   
  +# compute sizes
  +my(%sizeof);
  +foreach (keys(%real_type)) {
  +    $sizeof{$_}=length(pack($pack_type{$real_type{$_}},0));
  +}
  +                
  +
  +# get opcodes from guts.
   open GUTS, "interp_guts.h";
  -my $opcode;
  +my %opcodes;
   while (<GUTS>) {
  -    next unless /\tx\[(\d+)\] = ([a-z0-9_]+);/;
  +    next unless /\tx\[(\d+)\] = ([a-z_]+);/;
       $opcodes{$2}{CODE} = $1;
   }
  +close GUTS;
   
  +# get opcodes and their arg lists
   open OPCODES, "<opcode_table" or die "Can't get opcode table, $!/$^E";
   while (<OPCODES>) {
       next if /^\s*#/;
  @@ -46,110 +50,174 @@
       $opcodes{$name}{TYPES} = [@types];
       $opcodes{$name}{RTYPES}=[@rtypes];
   }
  +close OPCODES;
   
  -my $pc = 0;
  -my @code;
  -my %constants;
  -my @constants;
  -
  -# First scan for labels and strings
  -while (<>) {
  -    next if /^\s?#/;
  -    s/^\s*//;
  -    if (s/^\s*([a-zA-Z_]\w+):\s*//) { $labels{$1} = $pc; }
  -    1 while s/\"([^\\\"]*(?:\\.[^\\\"]*)*)\"/constantize($1)/eg;
  -    my ($opcode, @args) = split /\s+/, $_;
  -    push @code, $_;
  -    $pc += 1+@args;
  -}
   
  -emit_magic();
  -emit_fixup_section();
  -emit_constants_section();
  -
  -# Now assemble
  -$pc = 0;
  +# read source and assemble
  +my $pc=0; my $op_pc=0;
  +my ($bytecode,%label,%fixup,%constants,@constants);
   my $line = 0;
  -foreach (@code) {
  +while(<>) {
       $line++;
       chomp;
  -    next if(m/^\s*$/); # blank lines
  -    s/,/ /g;
  -    
  -    my ($opcode, @args) = split /\s+/, $_;
  -
  -    if (!exists $opcodes{lc $opcode}) {
  -     die "No opcode $opcode at line $line:\n  <$_>\n";
  +    s/^\s*//;
  +    s/\s*$//;
  +    next if(/^\#/ || $_ eq "");
  +    if(m/^((\S+):)?\s*(.+)?/) {
  +        my($label,$code)=($2,$3);
  +        if(defined($label) && $label ne "") {
  +            if(exists($label{$label})) {
  +                error("'$label' already defined!");
  +            }
  +            if(exists($fixup{$label})) {
  +                # backpatch everything with this PC.
  +                while(scalar(@{$fixup{$label}})) {
  +                    my $op_pc=shift(@{$fixup{$label}});
  +                    my $offset=shift(@{$fixup{$label}});
  +                    substr($bytecode,$offset,4)=pack('l',($pc-$op_pc)/4);
  +                }
  +                delete($fixup{$label});  
  +            }
  +            $label{$label}=$pc; # store it.
  +        }
  +        next if(!defined($code));
  +        1 while $code=~s/\"([^\\\"]*(?:\\.[^\\\"]*)*)\"/constantize($1)/eg;
  +        $code=~s/,/ /g;
  +        my($opcode,@args)=split(/\s+/,$code);
  +     $opcode=lc($opcode);
  +        if (!exists $opcodes{$opcode}) {
  +         # try to determine _real_ opcode.
  +         my @arg_t=();
  +         foreach (@args) {
  +             if(m/^([INPS])\d+$/) {
  +                 # a register.
  +                 push @arg_t,lc($1);
  +             } elsif(m/^\d+$/) {
  +                 # a constant of some sort
  +                 push @arg_t,'(ic|nc|sc)';
  +             } else {
  +                 # a label
  +                 push @arg_t,'ic';
       }
  +         }
  +         my $test;
  +         my($first,$last)=($arg_t[0],$arg_t[-1]);
  +         if($first ne $last) {
  +             $test="$opcode\_$first\_$last";
  +         } else {
  +             $test="$opcode\_$first";
  +         }
  +         my($found_op)=0;
  +         foreach my $op (grep($_=~/^$opcode/,keys(%opcodes))) {
  +             if($op=~/$test/) {
  +                 $opcode=$op;
  +                 $found_op=1;
  +                 last;
  +             }
  +         }
  +            error("No opcode $opcode in <$_>") if(!$found_op);
  +        }
       if (@args != $opcodes{$opcode}{ARGS}) {
  -     die "wrong arg count--got ". scalar @args. " needed " . 
$opcodes{$opcode}{ARGS};
  +            error("Wrong arg count--got ".scalar(@args)." needed
  +".$opcodes{$opcode}{ARGS});
       }
  -    $output .= pack "l", $opcodes{$opcode}{CODE};
  +        $bytecode .= pack "l", $opcodes{$opcode}{CODE};
  +        $op_pc=$pc;
  +        $pc+=4;
  +
       foreach (0..$#args) {
          my($rtype)=$opcodes{$opcode}{RTYPES}[$_];
          my($type)=$opcodes{$opcode}{TYPES}[$_];
  -       if($rtype eq "I" || $rtype eq "N" || $rtype eq "P" || $rtype eq "S") {
  +            if($rtype eq "I" || $rtype eq "N" || $rtype eq "P" || $rtype eq
  +"S") {
              # its a register argument
              $args[$_]=~s/^[INPS](\d+)$/$1/i;
  +                $pc+=$sizeof{$rtype}
          } elsif($rtype eq "D") {
              # a destination
  -           $args[$_]=fixup($args[$_]);
  +                if(!exists($label{$args[$_]})) {
  +                    # we have not seen it yet...put it on the fixup list
  +                    push(@{$fixup{$args[$_]}},$op_pc,$pc);
  +                    $args[$_]=0xffffffff;
          } else {
  +                    $args[$_]=($label{$args[$_]}-$op_pc)/4;
  +                }
  +                $pc+=$sizeof{$rtype};
  +            } else {
           $args[$_]=oct($args[$_]) if($args[$_]=~/^0/);
  +                $pc+=$sizeof{$rtype};           
          }
  -       $output .= pack $type, $args[$_];
  +            $bytecode .= pack $type, $args[$_];
       }
  -    $pc += 1+@args;
   }
  -
  -print $output unless (defined $opt_c and $opt_c);
  -
  -sub fixup {
  -    my $l = shift;
  -    die "Unknown label $l" unless exists $labels{$l};
  -    return $labels{$l} - $pc;
   }
   
  -sub constantize {
  -    my $s = shift;
  -    return $constants{$s} if exists $constants{$s};
  -    push @constants, $s;
  -    return $constants{$s} = $#constants;
  -}
  +my $output;
   
  -sub emit_magic { $output .= pack($pack_type{i}, 0x13155a1) }
  +# build file in memory
   
  -# Dummy for now.
  -sub emit_fixup_section { $output .= pack($pack_type{i}, 0) }
  +# MAGIC COOKIE
  +$output=pack($pack_type{i},0x13155a1);
   
  -sub emit_constants_section {
  -    # First, compute how big it's going to be.
  -    # The fields we'll need to fill in are: strlen, flags, encoding, type
  -    my $size =0 ;
  -    for (@constants) {
  -        $size += 4*$sizeof_packi;
  -        $size += length($_);
  -     my($pad)=length($_) % $sizeof_packi;
  -     if($pad) {
  -         $size+=$sizeof_packi-$pad;
  +
  +# FIXUP
  +if(keys(%fixup)) {
  +    print STDERR "SQUAWK!  These symbols were referenced but not
  +defined:\n";
  +    foreach (sort(keys(%fixup))) {
  +        print STDERR "\t$_ at pc: ";
  +        foreach my $pc (@{$fixup{$_}}) {
  +            print STDERR sprintf("%08x ",$pc);
        }
  +        print STDERR "\n";
       }
  -
  -    $size += $sizeof_packi if @constants; # That's for the number of constants
  -    $output .= pack($pack_type{i}, $size);
  -    return unless @constants; # Zero means end of segment.
  +    exit;
  +} else {
  +    # dump empty header
  +    $output.=pack($pack_type{i},0);
  +}
   
  +# CONSTANTS
  +if(@constants) {
  +    my($const);
       # Then spit out how many constants there are, so we can allocate
  -    $output .= pack($pack_type{i}, scalar @constants);
  +    $const .= pack($pack_type{i}, scalar @constants);
   
       # Now emit each constant
       for (@constants) {
  -        $output .= pack($pack_type{i},0) x 3; # Flags, encoding, type
  -        $output .= pack($pack_type{i},length($_)); # Strlen followed by that many 
bytes.
  -        $output .= $_;
  -     my $pad=(length($_) % $sizeof_packi);
  +        $const .= pack($pack_type{i},0) x 3; # Flags, encoding, type
  +        $const .= pack($pack_type{i},length($_)); # Strlen followed by that many 
bytes.
  +        $const .= $_;
  +        my $pad=(length($_) % $sizeof{i});
        if($pad) {
  -         $output .= "\0" x ($sizeof_packi-(length($_) % $sizeof_packi)); # Padding;
  +            $const .= "\0" x ($sizeof{i}-(length($_) % $sizeof{i})); # Padding;
          }
       }
  +
  +    $output.=pack($pack_type{i},length($const));
  +    $output.=$const;
  +} else {
  +    # no constants, dump empty header.
  +    $output.=pack($pack_type{i},0);
  +}
  +
  +## BYTECODE
  +$output.=$bytecode;
  +
  +if(!$opt_c) {
  +    print $output;
  +}
  +
  +sub error {
  +    my($message)=@_;
  +    print STDERR "Error ($line): $message\n";
  +    exit;
  +}
  +
  +
  +sub constantize {
  +    my $s = shift;
  +    return $constants{$s} if exists $constants{$s};
  +    push @constants, $s;
  +    return $constants{$s} = $#constants;
   }
  
  
  

Reply via email to