Sorry Folks,
I just noticed a bug in the version of mapnames.pl I just posted. Seems,
that some hosts were excluded from the generation of the final ht://Dig
list. This version should fix this bug.
Sorry for the inconvenience. :-(
-Walter
#!/usr/local/bin/perl5 -Tw
# DISCLAIMER:
#
# THIS IS ONE OF MY FIRST ATTEMPTS TO PROGRAM IN PERL. THERE MAY BE DREADFUL
# THINGS GOING ON DOWN THERE. HOWEVER; THE SCRIPT WORKS FOR ME. USE IT AT
# YOUR OWN RISK. :-))
# mapnames.pl, 19.2.99, Walter Hafner ([EMAIL PROTECTED])
#
# You may modify and distribute this script as you like. But please give
# me feedback on bugfixes and enhancements! Thanks!
# You'll need the Perl modules MD5 and libwww_perl to run mapnames.pl
require LWP::UserAgent;
use MD5;
# Purpose of mapnames.pl:
#
# Feed hostnames into it (stdin, one per line), that answer on port 80
# and get informations on
#
# - Bad hostname lookups
# - Bad HTTP responses
# - Virtual hosts
# - Virtual host aliases
# - Server aliases
#
# AND
#
# - all information that is needed for the "server_aliases:"
# option of the ht://Dig configuration. All you have to do is
# Cut'n'Paste it into the config file.
# Typical usage:
#
# 1) build a list of all the Servers visited by ht://Dig. You can get it
# by either specify "create_url_list: yes" in the config and extract
# all hostnames after a run or run "htdig" with the -v option and
# extract the hostnames from the log. The hostnames must be given one
# per line, without any "http://" or ":80" parts.
#
# 2) run "cat hostnames | mapnames.pl > mapnames.log"
#
# the "mapnames.log" should look like:
#
# Bad Hostname Lookups:
# ...
# Virtual Hosts:
# ...
# Virtual Host Aliases:
# ...
# Server Aliases:
# ...
# Final Mappings for ht://Dig:
# ...
#
# 3) Have a look at the "Virtual Host Aliases" and "Server Aliases"
# sections. Are the names ok for you? If not, create a file, that
# gives the correct mappings between the normalized name (the name
# to the right of the "-->" and the name you want this host to
# show in the ht://Dig index. The file has the form:
#
# "normalized name" <Whitespace> "name in ht://Dig index"
# "normalized name" <Whitespace> "name in ht://Dig index"
# ...
#
# Be sure NOT to use server aliases on the left side of the mappings!
# Then set $MAPFILE below accordingly.
#
# 4) run "cat hostnames | mapnames.pl > mapnames.log" again,
# take the output below "Final Mappings for ht://Dig:" and put
# it into the ht://Dig configuration.
#
# 5) start ht://Dig ...
# CAVEATS AND TODOS:
#
# - Limited to Port 80 (hardcoded, sorry)
#
# - VERY NAIVE HTTP requests, one at a time. When mapping a large number
# of hosts, the script may run a LONG TIME. There would be a great
# speedup, if parallel HTTP requests were used.
#
# - The $MAPFILE may be out of date after some time. If a server moves
# from one host to a different one, you'll still have the old (FALSE)
# mappings. Only solution: delete $MAPFILE from time to time and
# run steps 1-4 above again. Painful and awkward, I know.
# TODO: Script to check the mappings in the $MAPFILE file.
# How it works:
#
# - look up all hostnames from stdin and build a hash consisting of
# all the DNS aliases <-> normalized name mappings.
# - get the HTML root-page for all the aliases and normalized names.
# Then compute the MD5 checksums for all pages
# - If the MD5 checksums between a DNS alias and the normalized name
# differ, it's a virtual host. Otherwise it's a server alias.
# - Get possible aliases among all virtual hosts, again by MD5
# comparison
# - do the final mapping, according to $MAPFILE. All the server
# aliases and virtual hosts aliases are remapped once again.
# - print the whole bunch of data
# GLOBAL VARIABLES:
#
# %hostlist Mapping DNS-Alias --> Normalized Name
# %badhosts Hosts, for which no info could be obtained
#
# %md5list Mapping DNS-Alias --> MD5 Hash of HTML Page
# %badhtmllist Hosts, that had a bad HTML response
#
# %aliaslist Mapping HTTP-Alias --> HTTP Server
# %virtlist Mapping HTTP-Virtual_Host --> HTTP Server
#
# %virtaliases Mapping between virt. Hosts on the same server
# i.e. _one_ virt. host with _several_ aliases
#
# %finalmappings Mapping between norm.name and name to show in ht://Dig
# File to hold the mappings betwwen normalized name and ht://Dig name
#
$MAPFILE = "Mappings";
###############################################################################
# huild %hostlist by adding single hostnames
# Call: addhash_host(<HOSTNAME>)
sub addhash_host {
local ($norm_name, $aliases);
local $al;
if (! $hostlist{$_[0]}) {
($norm_name, $aliases, undef, undef, undef) =
gethostbyname($_[0]) ; # get normalized name and all aliases
if (! $norm_name) { # No info returned
$badhosts{$_[0]} = 1;
} else {
$hostlist{$norm_name} = $norm_name; # We need that for MD5
foreach $al (split / /, $aliases) {
$hostlist{$al} = $norm_name; # add all aliases to %hostlist
}
}
print STDERR ".";
}
}
###############################################################################
# huild %md5list from existing %hostlist
# Call: addhash_host()
sub buildhashes_md5 {
local $ua = LWP::UserAgent->new;
local ($key, $url, $request, $response);
print STDERR "building MD5 hashvalues ";
# do HTTP requests for all hosts in %hostlist
while (($key, undef) = each %hostlist) {
if ((! $md5list{$key}) && (! $badhosts{$key})) {
$url = "http://" . $key . "/";
$request = HTTP::Request->new('GET', $url);
$response = $ua->simple_request($request); # simple: don't want redirects
if ($response->is_success) { # ok, page returned; do MD5 hash
$md5list{$key} = MD5->hexhash($response->content);
} else { # no info returned from HTTP request
$badhtmllist{$key} = 1;
}
undef $request;
}
print STDERR ".";
}
print STDERR "\n";
}
###############################################################################
# separate HTTP-Aliases from Virtual Hosts
# (build %aliaslist and %virtlist)
# Call: buildvirthosts()
sub buildvirthosts {
local $key;
print STDERR "separating aliases from virtual hosts ";
foreach $key (keys %md5list) {
if (! $md5list{$hostlist{$key}}) { # Must be a better solution!!
die "No info on $hostlist{$key} -- CRITICAL ERROR\n";
} elsif ($md5list{$key} eq $md5list{$hostlist{$key}}) {
$aliaslist{$key} = $hostlist{$key} if ($key ne $hostlist{$key});
} else {
$virtlist{$key} = $hostlist{$key};
}
print STDERR ".";
}
print STDERR "\n";
}
###############################################################################
# eliminite aliases among virtual hosts
# Call: buildvirtaliases()
sub buildvirtaliases {
local ($vkey, $vval);
local ($vckey, $vcval);
local ($hkey, $hval);
local %virtcount;
local (%virtmd5, %virtmd5count);
local $firstname;
print STDERR "getting aliases among virtual hosts ";
foreach $vval (values %virtlist) { # How many virtual hosts on norm.names?
$virtcount{$vval}++; # normal. name --> number of virt. hosts
}
# if more than one; test for aliases among them
while (($vckey, $vcval) = each %virtcount) {
if ($vcval > 1) { # get all virt. hosts of norm.name $vckey
while (($vkey, $vval) = each %virtlist) { # all MD5s for virt hosts
$virtmd5{$vkey} = $md5list{$vkey} if ($vval eq $vckey);
}
foreach $vval (values %virtmd5) { # double MD5s ?
$virtmd5count{$vval}++;
}
# Get all hosts with MD5 hash $hkey
while (($vkey, $vval) = each %virtmd5count) {
if ($vval > 1) { # if $hkey was found more than once
$firstname = 0;
while (($hkey, $hval) = each %md5list) {
if ($hval eq $vkey) {
if ($firstname == 0) { # found for the first time?
$primealias = $hkey;
$firstname = 1;
} else {
$virtaliases{$hkey} = $primealias;
delete $virtlist{$hkey};
}
}
}
}
}
undef %virtmd5;
undef %virtmd5count;
}
print STDERR ".";
}
print STDERR "\n";
}
###############################################################################
# build %finalmappings, the final list for ht://Dig
# Call: buildfinalmappings($MAPFILE)
sub buildfinalmappings {
local %maplist;
local ($a, $b);
local ($key, $val);
print STDERR "assigning final mappings ";
open MAPLIST, $_[0] or return;
while (<MAPLIST>) {
chomp($_);
($a, $b) = split ;
$maplist{$a} = $b;
}
close MAPLIST;
while (($key, $val) = each %virtaliases) {
if ($maplist{$val}) {
$finalmappings{$key} = $maplist{$val} if ($key ne $maplist{$val});
$finalmappings{$val} = $maplist{$val} if ($val ne $maplist{$val});
} else {
$finalmappings{$key} = $val;
}
print STDERR ".";
}
while (($key, $val) = each %aliaslist) {
if ($maplist{$val}) {
$finalmappings{$key} = $maplist{$val} if ($key ne $maplist{$val});
$finalmappings{$val} = $maplist{$val} if ($val ne $maplist{$val});
} else {
$finalmappings{$key} = $val;
}
print STDERR ".";
}
print STDERR "\n";
}
###############################################################################
# convenience function
# Call: printallinfo()
sub printallinfo {
local $key;
print "Bad Hostname Lookups:\n";
foreach $key (sort keys %badhosts) {
print "\t$key\n";
}
print "\nBad HTTP Responses:\n";
foreach $key (sort keys %badhtmllist) {
print "\t$key\n";
}
print "\nVirtual Hosts:\n";
foreach $key (sort keys %virtlist) {
print "\t$key --> $virtlist{$key}\n";
}
print "\nVirtual Host Aliases:\n";
foreach $key (sort keys %virtaliases) {
print "\t$key --> $virtaliases{$key}\n";
}
print "\nServer Aliases:\n";
foreach $key (sort keys %aliaslist) {
print "\t$key --> $aliaslist{$key}\n";
}
print "\nFinal Mappings for ht://Dig:\n";
foreach $key (sort keys %finalmappings) {
print "\t", $key, ":80=", $finalmappings{$key}, ":80 \\\n";
}
}
###############################################################################
###############################################################################
###############################################################################
# MAIN
# Call: cat <HOSTLIST> | <PRGNAME>
print STDERR "building host/alias-list ";
while (<>) {
chomp($_);
addhash_host($_);
}
print STDERR "\n";
buildhashes_md5();
buildvirthosts();
buildvirtaliases();
buildfinalmappings($MAPFILE);
printallinfo();