Sorry Folks,

I just noticed a bug in the version of mapnames.pl I just posted. Seems, 
that some hosts were excluded from the generation of the final ht://Dig
list. This version should fix this bug.

Sorry for the inconvenience. :-(

-Walter

#!/usr/local/bin/perl5 -Tw


# DISCLAIMER:
#
# THIS IS ONE OF MY FIRST ATTEMPTS TO PROGRAM IN PERL. THERE MAY BE DREADFUL
# THINGS GOING ON DOWN THERE. HOWEVER; THE SCRIPT WORKS FOR ME. USE IT AT 
# YOUR OWN RISK. :-))


# mapnames.pl, 19.2.99, Walter Hafner ([EMAIL PROTECTED])
#
# You may modify and distribute this script as you like. But please give
# me feedback on bugfixes and enhancements! Thanks!

# You'll need the Perl modules MD5 and libwww_perl to run mapnames.pl


require LWP::UserAgent;
use MD5;


# Purpose of mapnames.pl:
#
# Feed hostnames into it (stdin, one per line), that answer on port 80
# and get informations on
#
# - Bad hostname lookups
# - Bad HTTP responses
# - Virtual hosts
# - Virtual host aliases
# - Server aliases
# 
# AND
# 
# - all information that is needed for the "server_aliases:"
#   option of the ht://Dig configuration. All you have to do is
#   Cut'n'Paste it into the config file.


# Typical usage:
#
# 1) build a list of all the Servers visited by ht://Dig. You can get it
#    by either specify "create_url_list: yes" in the config and extract
#    all hostnames after a run or run "htdig" with the -v option and
#    extract the hostnames from the log. The hostnames must be given one
#    per line, without any "http://" or ":80" parts.
#
# 2) run "cat hostnames | mapnames.pl > mapnames.log"
#
#    the "mapnames.log" should look like:
#
#    Bad Hostname Lookups:
#            ...
#    Virtual Hosts:
#            ...
#    Virtual Host Aliases:
#            ...
#    Server Aliases:
#            ...
#    Final Mappings for ht://Dig:
#            ...
#
# 3) Have a look at the "Virtual Host Aliases" and "Server Aliases"
#    sections. Are the names ok for you? If not, create a file, that
#    gives the correct mappings between the normalized name (the name
#    to the right of the "-->" and the name you want this host to
#    show in the ht://Dig index. The file has the form:
#
#    "normalized name" <Whitespace> "name in ht://Dig index"
#    "normalized name" <Whitespace> "name in ht://Dig index"
#    ...
#
#    Be sure NOT to use server aliases on the left side of the mappings!
#    Then set $MAPFILE below accordingly.
#
# 4) run "cat hostnames | mapnames.pl > mapnames.log" again,
#    take the output below "Final Mappings for ht://Dig:" and put
#    it into the ht://Dig configuration.
#
# 5) start ht://Dig ...


# CAVEATS AND TODOS:
#
# - Limited to Port 80 (hardcoded, sorry)
#
# - VERY NAIVE HTTP requests, one at a time. When mapping a large number
#   of hosts, the script may run a LONG TIME. There would be a great
#   speedup, if parallel HTTP requests were used.
#
# - The $MAPFILE may be out of date after some time. If a server moves
#   from one host to a different one, you'll still have the old (FALSE)
#   mappings. Only solution: delete $MAPFILE from time to time and
#   run steps 1-4 above again. Painful and awkward, I know.
#   TODO: Script to check the mappings in the $MAPFILE file.


# How it works:
#
# - look up all hostnames from stdin and build a hash consisting of
#   all the DNS aliases <-> normalized name mappings.
# - get the HTML root-page for all the aliases and normalized names.
#   Then compute the MD5 checksums for all pages
# - If the MD5 checksums between a DNS alias and the normalized name
#   differ, it's a virtual host. Otherwise it's a server alias.
# - Get possible aliases among all virtual hosts, again by MD5
#   comparison
# - do the final mapping, according to $MAPFILE. All the server
#   aliases and virtual hosts aliases are remapped once again.
# - print the whole bunch of data


# GLOBAL VARIABLES:
#
# %hostlist        Mapping DNS-Alias --> Normalized Name
# %badhosts        Hosts, for which no info could be obtained
#
# %md5list         Mapping DNS-Alias --> MD5 Hash of HTML Page
# %badhtmllist     Hosts, that had a bad HTML response
#
# %aliaslist       Mapping HTTP-Alias --> HTTP Server
# %virtlist        Mapping HTTP-Virtual_Host --> HTTP Server
#
# %virtaliases     Mapping between virt. Hosts on the same server
#                  i.e. _one_ virt. host with _several_ aliases
#
# %finalmappings   Mapping between norm.name and name to show in ht://Dig


# File to hold the mappings betwwen normalized name and ht://Dig name
#
$MAPFILE = "Mappings";


###############################################################################
# huild %hostlist by adding single hostnames
# Call: addhash_host(<HOSTNAME>)

sub addhash_host {
  local ($norm_name, $aliases);
  local $al;

  if (! $hostlist{$_[0]}) {
    ($norm_name, $aliases, undef, undef, undef) =
      gethostbyname($_[0]) ;    # get normalized name and all aliases

    if (! $norm_name) { # No info returned
      $badhosts{$_[0]} = 1;
    } else {
      $hostlist{$norm_name} = $norm_name; # We need that for MD5
      foreach $al (split / /, $aliases) {
        $hostlist{$al} = $norm_name; # add all aliases to %hostlist
      }
    }
    print STDERR ".";
  }
}


###############################################################################
# huild %md5list from existing %hostlist
# Call: addhash_host()

sub buildhashes_md5 {
  local $ua = LWP::UserAgent->new;
  local ($key, $url, $request, $response);

  print STDERR "building MD5 hashvalues ";

                                # do HTTP requests for all hosts in %hostlist
  while (($key, undef) = each %hostlist) {
    if ((! $md5list{$key}) && (! $badhosts{$key})) {
      $url = "http://" . $key . "/";
      $request = HTTP::Request->new('GET', $url);
      $response = $ua->simple_request($request); # simple: don't want redirects
      if ($response->is_success) { # ok, page returned; do MD5 hash
        $md5list{$key} = MD5->hexhash($response->content);
      } else {                  # no info returned from HTTP request
        $badhtmllist{$key} = 1;
      }
      undef $request;
    }
    print STDERR ".";
  }
  print STDERR "\n";
}


###############################################################################
# separate HTTP-Aliases from Virtual Hosts
# (build %aliaslist and %virtlist)
# Call: buildvirthosts()

sub buildvirthosts {
  local $key;

  print STDERR "separating aliases from virtual hosts ";

  foreach $key (keys %md5list) {
    if (! $md5list{$hostlist{$key}}) { # Must be a better solution!!
      die "No info on $hostlist{$key} -- CRITICAL ERROR\n";
    } elsif ($md5list{$key} eq $md5list{$hostlist{$key}}) {
      $aliaslist{$key} = $hostlist{$key} if ($key ne $hostlist{$key});
    } else {
      $virtlist{$key} = $hostlist{$key};
    }
    print STDERR ".";
  }
  print STDERR "\n";
}


###############################################################################
# eliminite aliases among virtual hosts
# Call: buildvirtaliases()

sub buildvirtaliases {
  local ($vkey, $vval);
  local ($vckey, $vcval);
  local ($hkey, $hval);
  local %virtcount;
  local (%virtmd5, %virtmd5count);
  local $firstname;

  print STDERR "getting aliases among virtual hosts ";

  foreach $vval (values %virtlist) { # How many virtual hosts on norm.names?
    $virtcount{$vval}++;        # normal. name --> number of virt. hosts
  }  
                                # if more than one; test for aliases among them
  while (($vckey, $vcval) = each %virtcount) {
    if ($vcval > 1) {           # get all virt. hosts of norm.name $vckey
      while (($vkey, $vval) = each %virtlist) { # all MD5s for virt hosts
        $virtmd5{$vkey} = $md5list{$vkey} if ($vval eq $vckey);
      }
      foreach $vval (values %virtmd5) { # double MD5s ?
        $virtmd5count{$vval}++;
      } 
                                # Get all hosts with MD5 hash $hkey
      while (($vkey, $vval) = each %virtmd5count) {
        if ($vval > 1) {        # if $hkey was found more than once
          $firstname = 0;
          while (($hkey, $hval) = each %md5list) {
            if ($hval eq $vkey) {
              if ($firstname == 0) { # found for the first time?
                $primealias = $hkey;
                $firstname = 1;
              } else {
                $virtaliases{$hkey} = $primealias;
                delete $virtlist{$hkey};
              }
            }
          }
        }
      }
      undef %virtmd5;
      undef %virtmd5count;
    }
    print STDERR ".";
  }
  print STDERR "\n";
}


###############################################################################
# build %finalmappings, the final list for ht://Dig
# Call: buildfinalmappings($MAPFILE)

sub buildfinalmappings {
  local %maplist;
  local ($a, $b);
  local ($key, $val);

  print STDERR "assigning final mappings ";

  open MAPLIST, $_[0] or return;
  while (<MAPLIST>) {
    chomp($_);
    ($a, $b) = split ;
    $maplist{$a} = $b;
  }
  close MAPLIST;

  while (($key, $val) = each %virtaliases) {
    if ($maplist{$val}) {
      $finalmappings{$key} = $maplist{$val} if ($key ne $maplist{$val});
      $finalmappings{$val} = $maplist{$val} if ($val ne $maplist{$val});
    } else {
      $finalmappings{$key} = $val;
    }
    print STDERR ".";
  }
  while (($key, $val) = each %aliaslist) {
    if ($maplist{$val}) {
      $finalmappings{$key} = $maplist{$val} if ($key ne $maplist{$val});
      $finalmappings{$val} = $maplist{$val} if ($val ne $maplist{$val});
    } else {
      $finalmappings{$key} = $val;
    }
    print STDERR ".";
  }
  print STDERR "\n";
}


###############################################################################
# convenience function
# Call: printallinfo()

sub printallinfo {
  local $key;

  print "Bad Hostname Lookups:\n";
  foreach $key (sort keys %badhosts) {
    print "\t$key\n";
  }
  print "\nBad HTTP Responses:\n";
  foreach $key (sort keys %badhtmllist) {
    print "\t$key\n";
  }
  print "\nVirtual Hosts:\n";
  foreach $key (sort keys %virtlist) {
    print "\t$key --> $virtlist{$key}\n";
  }
  print "\nVirtual Host Aliases:\n";
  foreach $key (sort keys %virtaliases) {
    print "\t$key --> $virtaliases{$key}\n";
  }
  print "\nServer Aliases:\n";
  foreach $key (sort keys %aliaslist) {
    print "\t$key --> $aliaslist{$key}\n";
  }
  print "\nFinal Mappings for ht://Dig:\n";
  foreach $key (sort keys %finalmappings) {
    print "\t", $key, ":80=", $finalmappings{$key}, ":80 \\\n";
  }
}


###############################################################################
###############################################################################
###############################################################################
# MAIN
# Call: cat <HOSTLIST> | <PRGNAME>

print STDERR "building host/alias-list ";
while (<>) {
  chomp($_);
  addhash_host($_);
}
print STDERR "\n";

buildhashes_md5();
buildvirthosts();
buildvirtaliases();
buildfinalmappings($MAPFILE);

printallinfo();

Reply via email to