On 2014-03-12 13:24:42 +0200, Martin Storsjö wrote:
> This syntax is supported by the official arm tools and
> by Microsoft's assembler.
> 
> This currently only supports microsoft's assembler, the
> armasm assembler in RVCT requires a few more tweaks to
> be able to build libav.
> 
> The preprocessing is done by invoking cpp (do we need to
> be able to override this?).
> 
> The converted output is written to a file instead of using
> a pipe, since Microsoft's armasm can't read the input from
> a pipe.

Does the name still makes sense? I guess it preprocesses
modern gas asm files so that other assemblers can use them.

If anyone has a good idea how the support for different
target assemblers could be made more modular without requiring
all the ifs scattered through the source it would be welcome.

I looked only lightly at the armasm specific parts, looks ok-ish
aside from my comments below.

Janne

> ---
>  gas-preprocessor.pl | 274 
> ++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 245 insertions(+), 29 deletions(-)
> 
> diff --git a/gas-preprocessor.pl b/gas-preprocessor.pl
> index a8f119a..50b5de9 100755
> --- a/gas-preprocessor.pl
> +++ b/gas-preprocessor.pl
> @@ -40,7 +40,7 @@ command. Following options are currently supported:
>  
>      -help         - this usage text
>      -arch         - target architecture
> -    -as-type      - one value out of {,apple-}{gas,clang}
> +    -as-type      - one value out of {{,apple-}{gas,clang},armasm}
>      -fix-unreq
>      -no-fix-unreq
>  ";
> @@ -79,7 +79,7 @@ while (@options) {
>          die "unkown arch: '$arch'\n" if not exists $comments{$arch};
>      } elsif ($opt eq "-as-type") {
>          $as_type = shift @options;
> -        die "unkown as type: '$as_type'\n" if $as_type !~ 
> /^(apple-)?(gas|clang)$/;
> +        die "unkown as type: '$as_type'\n" if $as_type !~ 
> /^((apple-)?(gas|clang)|armasm)$/;
>      } elsif ($opt eq "-help") {
>          usage();
>          exit 0;
> @@ -103,6 +103,25 @@ if (grep /\.c$/, @gcc_cmd) {
>  } else {
>      die "Unrecognized input filetype";
>  }
> +if ($as_type eq "armasm") {
> +
> +    $preprocess_c_cmd[0] = "cpp";
> +
> +    @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
> +    # Remove -ignore XX parameter pairs from preprocess_c_cmd
> +    my $index = 1;
> +    while ($index < $#preprocess_c_cmd) {
> +        if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < 
> $#preprocess_c_cmd) {
> +            splice(@preprocess_c_cmd, $index, 2);
> +            next;
> +        }
> +        $index++;
> +    }
> +    if (grep /^-MM$/, @preprocess_c_cmd) {
> +        system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
> +        exit 0;
> +    }
> +}
>  
>  # if compiling, avoid creating an output file named '-.o'
>  if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
> @@ -116,8 +135,27 @@ if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
>          }
>      }
>  }
> -@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
>  @preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd;
> +my $tempfile;
> +if ($as_type ne "armasm") {
> +    @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
> +} else {
> +    @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
> +    @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
> +
> +    my @outfiles = grep /\.o$/, @gcc_cmd;
> +    $tempfile = $outfiles[0].".asm";
> +
> +    # Remove most parameters from gcc_cmd, which actually is the armasm 
> command,
> +    # which doesn't support any of the common compiler/preprocessor options.
> +    @gcc_cmd = grep ! /^-D/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-U/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-m/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-M/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-c$/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-I/, @gcc_cmd;
> +    @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd;
> +}
>  
>  # detect architecture from gcc binary name
>  if (!$arch) {
> @@ -167,23 +205,53 @@ my %symbols;
>  while (<ASMFILE>) {
>      # remove all comments (to avoid interfering with evaluating directives)
>      s/(?<!\\)$comm.*//x;
> +    # Strip out windows linefeeds
> +    s/\r$//;
> +    # Strip out line number comments - armasm can handle them in a separate
> +    # syntax, but since the line numbers are off they are only misleading.
> +    s/^#\s+(\d+).*//          if $as_type =~ /armasm/;
>  
>      # comment out unsupported directives
> -    s/\.type/$comm$&/x        if $as_type =~ /^apple-/;
> +    s/\.type/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
>      s/\.func/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
>      s/\.endfunc/$comm$&/x     if $as_type =~ /^(apple-|clang)/;
> -    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang)/;
> -    s/\.size/$comm$&/x        if $as_type =~ /^apple-/;
> -    s/\.fpu/$comm$&/x         if $as_type =~ /^apple-/;
> -    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
> -    s/\.object_arch/$comm$&/x if $as_type =~ /^apple-/;
> -
> -    # the syntax for these is a little different
> -    s/\.global/.globl/x       if $as_type =~ /apple-/;
> -    # also catch .section .rodata since the equivalent to .const_data is 
> .section __DATA,__const
> -    s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
> -    s/\.int/.long/x;
> -    s/\.float/.single/x;
> +    s/\.endfunc/ENDP/x        if $as_type =~ /armasm/;
> +    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang|armasm)/;
> +    s/\.size/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
> +    s/\.fpu/$comm$&/x         if $as_type =~ /^(apple-|armasm)/;
> +    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang|armasm)/;
> +    s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
> +
> +    s/\.syntax/$comm$&/x      if $as_type =~ /armasm/;
> +    s/\.thumb/THUMB/x         if $as_type =~ /armasm/;
> +    s/\.arm/ARM/x             if $as_type =~ /armasm/;

we use .arm|.thumb in the 2nd pass to set $thumb and that is not
modified, does thumb work with armasm?

Also if you have ideas how the as_type based commenting could be
improved. I wasn't really happy with it when I wrote it.

> +    # armasm uses a different comment character. We don't want to change
> +    # $comm originally since that matches what the input source uses.
> +    s/$comm/;/                if $as_type =~ /armasm/;
> +
> +    if ($as_type ne "armasm") {

I think this could be $as_type =~ /apple-/, no need to replace .int and .float
with modern gas and upstream clang, I didn't made them conditional since both
support .long/.single too

> +        # the syntax for these is a little different
> +        s/\.global/.globl/x       if $as_type =~ /apple-/;
> +        # also catch .section .rodata since the equivalent to .const_data is 
> .section __DATA,__const
> +        s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
> +        s/\.int/.long/x;
> +        s/\.float/.single/x;
> +    } else {
> +        s/\.global/EXPORT/x;
> +        s/\.int/dcd/x;
> +        s/\.long/dcd/x;
> +        s/\.float/dcfs/x;
> +        s/\.word/dcd/x;
> +        s/\.short/dcw/x;
> +        s/\.byte/dcb/x;
> +        # The alignment in AREA is the power of two, just as .align in gas
> +        s/\.text/AREA |.text|, CODE, READONLY, ALIGN=2, CODEALIGN/;
> +        s/(.*)\.rodata/AREA |.rodata|, DATA, READONLY, ALIGN=5/;
> +
> +        s/fmxr/vmsr/;
> +        s/fmrx/vmrs/;
> +        s/fadds/vadd/;
> +    }
>  
>      # catch unknown section names that aren't mach-o style (with a comma)
>      if ($as_type =~ /apple-/ and /.section ([^,]*)$/) {
> @@ -327,7 +395,9 @@ sub handle_set {
>      my $line = $_[0];
>      if ($line =~ /\.set\s+(.*),\s*(.*)/) {
>          $symbols{$1} = eval_expr($2);
> +        return 1;
>      }
> +    return 0;
>  }
>  
>  sub expand_macros {
> @@ -450,7 +520,11 @@ close(ASMFILE) or exit 1;
>  if ($ENV{GASPP_DEBUG}) {
>      open(ASMFILE, ">&STDOUT");
>  } else {
> -    open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
> +    if ($as_type ne "armasm") {
> +        open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
> +    } else {
> +        open(ASMFILE, ">", $tempfile);
> +    }
>  }
>  
>  my @sections;
> @@ -466,6 +540,7 @@ my $thumb = 0;
>  
>  my %thumb_labels;
>  my %call_targets;
> +my %mov32_targets;
>  
>  my @irp_args;
>  my $irp_param;
> @@ -473,6 +548,12 @@ my $irp_param;
>  my %neon_alias_reg;
>  my %neon_alias_type;
>  
> +my $temp_label_next = 0;
> +my %last_temp_labels;
> +my %next_temp_labels;
> +
> +my %labels_seen;
> +
>  my %aarch64_req_alias;
>  
>  # pass 2: parse .rept and .if variants
> @@ -492,7 +573,7 @@ foreach my $line (@pass1_lines) {
>      $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
>  
>      # handle ldr <reg>, =<expr>
> -    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
> +    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne 
> "armasm") {
>          my $label = $literal_labels{$3};
>          if (!$label) {
>              $label = "Literal_$literal_num";
> @@ -500,7 +581,7 @@ foreach my $line (@pass1_lines) {
>              $literal_labels{$3} = $label;
>          }
>          $line = "$1 ldr$2, $label\n";
> -    } elsif ($line =~ /\.ltorg/) {
> +    } elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") {
>          $line .= ".align 2\n";
>          foreach my $literal (keys %literal_labels) {
>              $line .= "$literal_labels{$literal}:\n $literal_expr $literal\n";
> @@ -533,7 +614,7 @@ foreach my $line (@pass1_lines) {
>      }
>  
>      if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.globl)\s+(\w+)/ and
> -     $as_type ne "gas") {
> +     $as_type !~ "gas|armasm") {

this also excludes apple-gas

>          my $cond = $3;
>          my $label = $4;
>          # Don't interpret e.g. bic as b<cc> with ic as conditional code
> @@ -627,7 +708,8 @@ sub handle_serialized_line {
>          return if handle_if($line);
>      }
>  
> -    handle_set($line);
> +    # Strip out the .set lines from the armasm output
> +    return if (handle_set($line) and $as_type eq "armasm");
>  
>      if ($line =~ /\.unreq\s+(.*)/) {
>          if (defined $neon_alias_reg{$1}) {
> @@ -668,7 +750,7 @@ sub handle_serialized_line {
>          }
>      }
>  
> -    if ($arch eq "aarch64") {
> +    if ($arch eq "aarch64" or $as_type eq "armasm") {
>          # clang's integrated aarch64 assembler in Xcode 5 does not support 
> .req/.unreq
>          if ($line =~ /\b(\w+)\s+\.req\s+(\w+)\b/) {
>              $aarch64_req_alias{$1} = $2;
> @@ -683,6 +765,8 @@ sub handle_serialized_line {
>              }
>              $line =~ s/\b$alias\b/$resolved/g;
>          }
> +    }
> +    if ($arch eq "aarch64") {
>          # fix missing aarch64 instructions in Xcode 5.1 (beta3)
>          # mov with vector arguments is not supported, use alias orr instead
>          if ($line =~ 
> /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
> @@ -708,17 +792,149 @@ sub handle_serialized_line {
>          }
>      }
>  
> +    if ($as_type eq "armasm") {
> +        # Also replace variables set by .set
> +        foreach (keys %symbols) {
> +            my $sym = $_;
> +            $line =~ s/\b$sym\b/$symbols{$sym}/g;
> +        }
> +
> +        # Handle function declarations and keep track of the declared labels
> +        if ($line =~ s/^\s*\.func\s+(\w+)/$1 PROC/) {
> +            $labels_seen{$1} = 1;
> +        }
> +
> +        if ($line =~ s/^(\d+)://) {
> +            # Convert local labels into unique labels. armasm (at least in
> +            # RVCT) has something similar, but still different enough.
> +            # By converting to unique labels we avoid any possible
> +            # incompatibilities.
> +
> +            my $num = $1;
> +            foreach (@{$next_temp_labels{$num}}) {
> +                $line = "$_\n" . $line;
> +            }
> +            @next_temp_labels{$num} = ();
> +            my $name = "temp_label_$temp_label_next";
> +            $temp_label_next++;
> +            # The matching regexp above removes the label from the start of
> +            # the line (which might contain an instruction as well), readd
> +            # it on a separate line above it.
> +            $line = "$name:\n" . $line;
> +            $last_temp_labels{$num} = $name;
> +        }
> +
> +        if ($line =~ s/^(\w+):/$1/) {
> +            # Skip labels that have already been declared with a PROC,
> +            # labels must not be declared multiple times.
> +            return if (defined $labels_seen{$1});
> +            $labels_seen{$1} = 1;
> +        } elsif ($line !~ /(\w+) PROC/) {
> +            # If not a label, make sure the line starts with whitespace,
> +            # otherwise ms armasm interprets it incorrectly.
> +            $line =~ s/^[\.\w]/\t$&/;
> +        }
> +
> +
> +        # Check branch instructions
> +        if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?)\s+(\w+)/) {
> +            my $cond = $3;
> +            my $target = $4;
> +            # Don't interpret e.g. bic as b<cc> with ic as conditional code
> +            if ($cond !~ 
> /|eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo/) {
> +                # Not actually a branch
> +            } elsif ($target =~ /(\d+)([bf])/) {
> +                # The target is a local label
> +                my $num = $1;
> +                my $dir = $2;
> +                if ($dir eq "b") {
> +                    $line =~ s/$target/$last_temp_labels{$num}/;
> +                } else {
> +                    my $name = "temp_label_$temp_label_next";
> +                    $temp_label_next++;
> +                    push(@{$next_temp_labels{$num}}, $name);
> +                    $line =~ s/$target/$name/;
> +                }
> +            } elsif ($target ne "lr" and
> +                     $target ne "ip" and
> +                     $target !~ /^[rav]\d+$/) {
> +                $call_targets{$target}++;
> +            }
> +        }
> +
> +        # ALIGN in armasm syntax is the actual number of bytes
> +        if ($line =~ /\.align\s+(\d+)/) {
> +            my $align = 1 << $1;
> +            $line =~ s/\.align\s(\d+)/ALIGN $align/;
> +        }
> +        # Convert gas style [r0, :128] into armasm [r0@128] alignment 
> specification
> +        $line =~ s/\[([^\[]+),\s*:(\d+)\]/[$1\@$2]/g;
> +
> +        # armasm treats logical values {TRUE} and {FALSE} separately from
> +        # numeric values - logical operators and values can't be intermixed
> +        # with numerical values. Evaluate !<number> and (a <> b) into 
> numbers,
> +        # let the assembler evaluate the rest of the expressions. This 
> current
> +        # only works for cases when ! and <> are used with actual constant 
> numbers,
> +        # we don't evaluate subexpressions here.
> +
> +        # Evaluate !<number>
> +        while ($line =~ /!\s*(\d+)/g) {
> +            my $val = ($1 != 0) ? 0 : 1;
> +            $line =~ s/!(\d+)/$val/;
> +        }
> +        # Evaluate (a > b)
> +        while ($line =~ /\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/) {
> +            my $val;
> +            if ($2 eq "<") {
> +                $val = ($1 < $3) ? 1 : 0;
> +            } else {
> +                $val = ($1 > $3) ? 1 : 0;
> +            }
> +            $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
> +        }
> +
> +        # Change a movw... #:lower16: into a mov32 pseudoinstruction
> +        $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
> +        # and remove the following, matching movt completely
> +        $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
> +
> +        if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
> +            $mov32_targets{$1}++;
> +        }
> +
> +        # Misc bugs/deficiencies:
> +        # armasm seems unable to parse e.g. "vmov s0, s1" without a type
> +        # qualifier, thus add .f32.
> +        $line =~ s/^(\s+(?:vmov|vadd))(\s+s)/$1.f32$2/;
> +        # armasm is unable to parse &0x - add spacing
> +        $line =~ s/&0x/& 0x/g;
> +    }
> +
>      print ASMFILE $line;
>  }
>  
> -print ASMFILE ".text\n";
> -print ASMFILE ".align 2\n";
> -foreach my $literal (keys %literal_labels) {
> -    print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
> -}
> +if ($as_type ne "armasm") {
> +    print ASMFILE ".text\n";
> +    print ASMFILE ".align 2\n";
> +    foreach my $literal (keys %literal_labels) {
> +        print ASMFILE "$literal_labels{$literal}:\n $literal_expr 
> $literal\n";
> +    }
> +
> +    map print(ASMFILE ".thumb_func $_\n"),
> +        grep exists $thumb_labels{$_}, keys %call_targets;
> +} else {
> +    map print(ASMFILE "\tIMPORT $_\n"),
> +        grep ! exists $labels_seen{$_}, (keys %call_targets, keys 
> %mov32_targets);
>  
> -map print(ASMFILE ".thumb_func $_\n"),
> -    grep exists $thumb_labels{$_}, keys %call_targets;
> +    print ASMFILE "\tEND\n";
> +}
>  
>  close(ASMFILE) or exit 1;
> +if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) {
> +    system(@gcc_cmd) == 0 or die "Error running assembler";
> +}
> +
> +END {
> +    unlink($tempfile) if defined $tempfile;
> +}
>  #exit 1
> -- 
> 1.8.1.2
> 
> _______________________________________________
> libav-devel mailing list
> [email protected]
> https://lists.libav.org/mailman/listinfo/libav-devel
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to