#!/usr/bin/perl -w
#
# Split squid log into multi logfiles by domain mapping config file
# example
#   a.log  example1.com example2.com
#   b.log  example3.com
#
# $Id: split_squid_log.pl,v 1.1 2003/05/31 16:40:57 chedong Exp $
# Author Che, Dong <chedong@bigfoot.com>

use strict;
use Getopt::Std;
use IO::File;

# parse command line options
my %opts = ();
getopts( 'hi:c:', \%opts );

# print help
if ( !%opts || $opts{'h'} ) {
    print "usage: $0 [options]
    -h: print this help
    -c: [config_file]
    -i: [input squid log file]
    example: $0 -c sync.conf -i access.log\n";
    exit;
}

# get input file and output config
my $input_file;
my $conf_file_name      = "";
my $line                = "";
my %domain_file_mapping = ();
my @domains             = ();
my %files               = ();
my $domain              = '';

if ( !$opts{'c'} || $opts{'c'} eq "" ) {
    print "please specifiey sync site configure file\n";
    exit;
}
else {
    $conf_file_name = $opts{'c'};
}

if ( !$opts{'i'} || $opts{'i'} eq "" ) {
    print "please specifiey input file\n";
    exit;
}
else {

    # open(LOG, $opts{'i'}) or die "Unable to open logfile: $!\n";
    $input_file = new IO::File $opts{'i'}, "r"
      or die "Unable to open logfile: $!\n";
}

# parse domain list from configure file

my @site_conf_list = ();
my $conf_file = new IO::File $conf_file_name, "r"
  or die "Can't open $conf_file_name: $!";
while ( defined( $line = $conf_file->getline() ) ) {
    chomp $line;

    # trim
    $line =~ s/^\s+//;
    $line =~ s/\s+$//;

    # skip comments
    if ( $line =~ m /^#/ or $line eq '' ) {

        # print "Comments: " . $line . "\n";
    }
    else {
        my ( $filename, @domain_list ) = split ( ' ', $line );

        push ( @domains, @domain_list );

        # init file handler
        $files{$filename} = new IO::File $filename, "w"
          or die "Couldn't open $filename for writing: $!\n";

        foreach $domain (@domain_list) {
            $domain_file_mapping{$domain} = $filename;
        }
    }

}
$conf_file->close();

# make an unknown.log for unknown mapping host
$files{"unknown.log"} = new IO::File "unknown.log", "w"
  or die "Couldn't open unknown.log for writing: $!\n";

my $domain_number = scalar(@domains);
my @file_list     = keys %files;
my $output_number = scalar(@file_list);
print "I have $domain_number domain(s) to split: @domains\n";
print "will output into $output_number file(s): @file_list\n";
print "according to following mapping:\n";
print map { "$_ => $domain_file_mapping{$_}\n" } keys %domain_file_mapping;

# split log according to the domain => output_file mapping
while ( defined( $line = $input_file->getline() ) ) {
    my $orig_line = $line;

    # convert "GET http://localhost:8000/index.html => "GET /index.html
    $line =~ s/"(GET|POST|HEAD) \w+:\/\/([\w\.]+)(:[\d]+)?/"$1 /;

    # print $line."host=".$2."\n";
    my $match = 0;

    foreach $domain (@domains) {
        if ( $2 =~ m /$domain/ ) {

# print "match domain: ".$domain." to file: ". $domain_file_mapping{$domain}."\n";
            $files{ $domain_file_mapping{$domain} }->print($line);
            $match = 1;
            last;
        }
    }
    if ( $match == 0 ) {
        $files{"unknown.log"}->print($orig_line);
    }
}

$input_file->close();

# close file_list
while ( my ( $k, $v ) = each %files ) {
    $files{$k}->close;
}

