
package ExtUtils::XSBuilder::ParseSource;

use strict;
use Config ();

our $VERSION = '0.03';


# ============================================================================

sub new {
    my $class = shift;

    my $self = bless {
        @_,
    }, $class;

    my $prefixes = join '|', @{ $self->{prefixes} || $self -> prefixlist };
    $self->{prefix_re} = qr{^($prefixes)};

    $self;
}

# ============================================================================

sub parse {
    my $self = shift;

    $self->{scan_filename} = $self->generate_cscan_file;

    $self->{c} = $self->scan;
}

# ============================================================================

sub DESTROY {
    my $self = shift;
    unlink $self->{scan_filename}
}

# ============================================================================

{
    package ExtUtils::XSBuilder::Scan;

    our @ISA = qw(C::Scan);

    sub get {
        local $SIG{__DIE__} = \&Carp::confess;
        shift->SUPER::get(@_);
    }
}

# ============================================================================

sub scan {
    require C::Scan;
    C::Scan->VERSION(0.75);
    require Carp;

    my $self = shift;

    my $c = C::Scan->new(filename => $self->{scan_filename});

    $c->set(includeDirs => $self->includes);
    $c->set(Defines => '-DCORE_PRIVATE');

    bless $c, 'ExtUtils::XSBuilder::Scan';
}

# ============================================================================
=pod

=head2 prefixlist (o)

returns a reference to a list of prefixes. Every Symbol that doesn't start 
with one of these prefixes is discarded

=cut
# ---------------


sub prefixlist {
    return [qw(ap_ apr_)] ;
}



# ============================================================================
=pod

=head2 include_dirs (o)

returns a reference to a list of include file directories.

=cut
# ---------------

sub include_dirs {
    my $self = shift;
    ($self->config->apxs(-q => 'INCLUDEDIR'),
     $self->config->mp_include_dir);
}



# ============================================================================
=pod

=head2 includes (o)

returns a reference to a list of include files ???? unused ????.

=cut
# ---------------

sub includes { [] }



# ============================================================================
=pod

=head2 unwanted_includes (o)

returns a reference to a list of include files that should not be processed.

=cut
# ---------------

sub unwanted_includes { [] }



# ============================================================================
=pod

=head2 sort_includes (o)

gets a array ref of include files, should returned an ordered one, so includes
are processed in the right order.

=cut
# ---------------

sub sort_includes {
    return shift ;
}



# ============================================================================
=pod

=head2 find_includes (o)

search directories given by C<include_dirs> for all files and build list
of include files. All files starting with a word returned by C<unwanted_includes>
are not included in the list.

=cut
# ---------------

sub find_includes {
    my $self = shift;

    return $self->{includes} if $self->{includes};

    require File::Find;

    my(@dirs) = $self->include_dirs;

    unless (-d $dirs[0]) {
        die "could not find include directory";
    }

    my @includes;
    my $unwanted = join '|', $self -> unwanted_includes ;

    for my $dir (@dirs) {
        File::Find::finddepth({
                               wanted => sub {
                                   return unless /\.h$/;
                                   return if /^($unwanted)/o;
                                   my $dir = $File::Find::dir;
                                   push @includes, "$dir/$_";
                               },
                               follow => 1,
                              }, $dir);
    }

    return $self->{includes} = $self -> sort_includes (\@includes) ;
}

# ============================================================================
=pod

=head2 cscan_filename (o)

return filename for temporary file that holds all #includes

=cut
# ---------------


sub cscan_filename { '.includes' } ;


# ============================================================================


sub generate_cscan_file {
    my $self = shift;

    my $includes = $self->find_includes;

    my $filename = $self->cscan_filename;

    open my $fh, '>', $filename or die "can't open $filename: $!";
    for (@$includes) {
        print $fh qq(\#include "$_"\n);
    }
    close $fh;

    return $filename;
}


# ============================================================================
=pod

=head2 defines_wanted (o)

returns a reference to a hash of defines that should be included

=cut
# ---------------

sub defines_wanted { {} } ;


# ============================================================================
=pod

=head2 defines_unwanted (o)

returns a reference to a hash of defines that should be excluded

=cut
# ---------------


sub defines_unwanted { undef } ;


# ============================================================================
=pod

=head2 enums_wanted (o)

returns a reference to a hash of enums that should be included

=cut
# ---------------

sub enums_wanted { {} } ;

# ============================================================================
=pod

=head2 enums_unwanted (o)

returns a reference to a hash of enums that should be excluded

=cut
# ---------------

sub enums_unwanted { undef } ;


# ============================================================================

sub defines_wanted_re {
    $self = shift ;

    return $self -> {defines_wanted_re} if ($self -> {defines_wanted_re}) ;

    my $defines_wanted = $self -> defines_wanted ;
    my $defines_wanted_re = $self -> {defines_wanted_re} = {} ;

    while (my($class, $groups) = each %$defines_wanted) {
        while (my($group, $wanted) = each %$groups) {
            my $pat = join '|', @$wanted;
            $defines_wanted_re -> {$class}->{$group} = $pat; #qr{^($pat)};
        }
    }
}


# ============================================================================


sub get_constants {
    my($self) = @_;

    my $includes = $self->find_includes;
    my(%constants, %seen);
    my $defines_wanted_re   = $self -> defines_wanted_re ;
    my $defines_wanted      = $self -> defines_wanted ;
    my $defines_unwanted    = $self -> defines_unwanted ;
    my $enums_wanted        = $self -> defines_enums ;
    my $enums_unwanted      = $self -> defines_enums ;

    for my $file (@$includes) {
        open my $fh, $file or die "open $file: $!";
        while (<$fh>) {
            if (s/^\#define\s+(\w+)\s+.*/$1/) {
                chomp;
                next if /_H$/;
                next if $seen{$_}++;
                $self->handle_constant(\%constants, $defines_wanted_re, $defines_wanted, $defines_unwanted);
            }
            elsif (m/enum[^\{]+\{/) {
                $self->handle_enum($fh, \%constants, $enums_wanted, $enums_unwanted);
            }
        }
        close $fh;
    }

    return \%constants;
}

# ============================================================================


sub handle_constant {
    my($self, $constants, $defines_wanted_re, $defines_wanted, $defines_unwanted) = @_;
    my $keys = keys %$defines_wanted_re; #XXX broken bleedperl ?

    return if ($defines_unwanted && (/^($defines_unwanted)/o)) ;

    while (my($class, $groups) = each %$defines_wanted_re) {
        my $keys = keys %$groups; #XXX broken bleedperl ?

        while (my($group, $re) = each %$groups) {
            next unless /^($re)/;
            push @{ $constants->{$class}->{$group} }, $_;
            return;
        }
    }
}

# ============================================================================

sub handle_enum {
    my($self, $fh, $constants, $enums_wanted, $enums_unwanted) = @_;

    my($name, $e) = $self->parse_enum($fh);
    return unless $name;

    $name = $self -> handle_enum_name ($name) ;

    my $class;
    for (keys %$enums_wanted) {
        next unless $enums_wanted->{$_}->{$name};
        $class = $_;
    }

    return unless $class;

    push @{ $constants->{$class}->{$name} }, @$e if $e;
}

# ============================================================================
=pod

=head2 handle_enum_name (o)

can be used to modify the name of an enum (strip prefix, postfix etc)

=cut
# ---------------

sub handle_enum_name {
    my($self, $name) = @_;

    return $name ;
}


# ============================================================================

#this should win an award for worlds lamest parser
sub parse_enum {
    my($self, $fh) = @_;
    my $code = $_;
    my @e;

    unless ($code =~ /;\s*$/) {
        local $_;
        while (<$fh>) {
            $code .= $_;
            last if /;\s*$/;
        }
    }

    my $name;
    if ($code =~ s/^\s*enum\s+(\w*)\s*//) {
        $name = $1;
    }
    elsif ($code =~ s/^\s*typedef\s+enum\s+//) {
        $code =~ s/\s*(\w+)\s*;\s*$//;
        $name = $1;
    }
    $code =~ s:/\*.*?\*/::sg;
    $code =~ s/\s*=\s*\w+//g;
    $code =~ s/^[^\{]*\{//s;
    $code =~ s/\}[^;]*;?//s;

    while ($code =~ /\b(\w+)\b,?/g) {
        push @e, $1;
    }

    return ($name, \@e);
}

# ============================================================================
=pod

=head2 wanted_functions (o)

return regex for functions that should be included

=cut
# ---------------

sub wanted_functions  { shift->{prefix_re} }


# ============================================================================
=pod

=head2 wanted_structures (o)

return regex for structures that should be included

=cut
# ---------------

sub wanted_structures { shift->{prefix_re} }


# ============================================================================

sub get_functions {
    my $self = shift;

    my $key = 'parsed_fdecls';
    return $self->{$key} if $self->{$key};

    my $c = $self->{c};

    my $fdecls = $c->get($key);

    my %seen;
    my $wanted = $self->wanted_functions;

    my @functions;

    for my $entry (@$fdecls) {
        my($rtype, $name, $args) = @$entry;
        next unless $name =~ $wanted;
        next if $seen{$name}++;

        for (qw(static __inline__)) {
            $rtype =~ s/^$_\s+//;
        }

        #XXX: working around C::Scan confusion here
        #macro defines ap_run_error_log causes
        #cpp filename:linenumber to be included as part of the type
        for (@$args) {
            next unless $_->[0];
            $_->[0] =~ s/^\#.*?\"\s+//;
        }

        my $func = {
           name => $name,
           return_type => $rtype,
           args => [map {
               { type => $_->[0], name => $_->[1] }
           } @$args],
        };

        push @functions, $func;
    }

    # sort the functions by the 'name' attribute to ensure a
    # consistent output on different systems.
    $self->{$key} = [sort { $a->{name} cmp $b->{name} } @functions];
}

# ============================================================================

sub get_structs {
    my $self = shift;

    my $key = 'typedef_structs';
    return $self->{$key} if $self->{$key};

    my $c = $self->{c};

    my $typedef_structs = $c->get($key);

    my %seen;
    my $wanted = $self->wanted_structures;

    my @structures;
    my $sx = qr(^struct\s+);

    while (my($type, $elts) = each %$typedef_structs) {
        next unless $type =~ $wanted ;

        $type =~ s/$sx//;

        next if $seen{$type}++;

        my $struct = {
           type => $type,
           elts => [map {
               my $type = $_->[0];
               $type =~ s/$sx//;
               $type .= $_->[1] if $_->[1];
               $type =~ s/:\d+$//; #unsigned:1
               { type => $type, name => $_->[2] }
           } @$elts],
        };

        push @structures, $struct;
    }

    # sort the structs by the 'type' attribute to ensure a consistent
    # output on different systems.
    $self->{$key} = [sort { $a->{type} cmp $b->{type} } @structures];
}

# ============================================================================
=pod

=head2 package (o)

return package name for tables

=cut
# ---------------

sub package { 'MY' }

# ============================================================================
=pod

=head2 targetdir (o)

return name of target directory where to write tables

=cut
# ---------------

sub targetdir { 'tables' }



# ============================================================================

sub write_functions_pm {
    my $self = shift;
    my $file = shift || 'FunctionTable.pm';
    my $name = shift || $self -> package . '::FunctionTable';

    $self->write_pm($file, $name, $self->get_functions);
}

# ============================================================================

sub write_structs_pm {
    my $self = shift;
    my $file = shift || 'StructureTable.pm';
    my $name = shift || $self -> package . '::StructureTable';

    $self->write_pm($file, $name, $self->get_structs);
}

# ============================================================================

sub write_constants_pm {
    my $self = shift;
    my $file = shift || 'ConstantsTable.pm';
    my $name = shift || $self -> package . '::ConstantsTable';

    $self->write_pm($file, $name, $self->get_constants);
}

# ============================================================================

sub write_pm {
    my($self, $file, $name, $data) = @_;

    require Data::Dumper;
    local $Data::Dumper::Indent = 1;

    my($subdir) = (split '::', $name)[0];

    my $tdir = $self -> targetdir ;
    if (-d "$tdir/$subdir") {
        $file = "$tdir/$subdir/$file";
    }

    # sort the hashes (including nested ones) for a consistent dump
    canonsort(\$data);

    my $dump = Data::Dumper->new([$data],
                                 [$name])->Dump;

    my $package = ref($self) || $self;
    my $version = $self->VERSION;
    my $date = scalar localtime;

    my $new_content = << "EOF";
package $name;

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# ! WARNING: generated by $package/$version
# !          $date
# !          do NOT edit, any changes will be lost !
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

$dump

1;
EOF

    my $old_content = '';
    if (-e $file) {
        open my $pm, '<', $file or die "open $file: $!";
        local $/ = undef; # slurp the file
        $old_content = <$pm>;
        close $pm;
    }

    my $overwrite = 1;
    if ($old_content) {
        # strip the date line, which will never be the same before
        # comparing
        my $table_header = qr{^\#\s!.*};
        (my $old = $old_content) =~ s/$table_header//mg;
        (my $new = $new_content) =~ s/$table_header//mg;
        $overwrite = 0 if $old eq $new;
    }

    if ($overwrite) {
        open my $pm, '>', $file or die "open $file: $!";
        print $pm $new_content;
        close $pm;
    }

}

# ============================================================================
#
# canonsort(\$data);
# sort nested hashes in the data structure.
# the data structure itself gets modified
#

sub canonsort {
    my $ref = shift;
    my $type = ref $$ref;

    return unless $type;

    require Tie::IxHash;

    my $data = $$ref;

    if ($type eq 'ARRAY') {
        for (@$data) {
            canonsort(\$_);
        }
    }
    elsif ($type eq 'HASH') {
        for (keys %$data) {
            canonsort(\$data->{$_});
        }

        tie my %ixhash, 'Tie::IxHash';

        # reverse sort so we get the order of:
        # return_type, name, args { type, name } for functions
        # type, elts { type, name } for structures

        for (sort { $b cmp $a } keys %$data) {
            $ixhash{$_} = $data->{$_};
        }

        $$ref = \%ixhash;
    }
}

1;
__END__
