#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;

# Copyright (C) 2005  Alistair McGlinchy
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    
my $res          =       300;
my $offset       =         0;
my $force_time   =     undef;
my $max_records  = 1_000_000;
my $usage        = <<END_MSG;
$0
  De-duplicates "fx" data seen by multiple routers into a single
  record. The "exporter" field contains the router with the
  largest value for this flow.

Usage:  $0 [-r <resolution>] [-t <base_time>] [-m <max_records> ] ft_file,...

        -r res     Two flows seen by different routers in the same
                   <res> seconds will be considered the same flow.
        -o offset  Flow time is rounded to int((time-offset)/res)*res+offset
        -f time    Forces all flows to be considered to be seen at this time
        -m max     Only check for duplicates within <max> records,
                   default to $max_records records. [Use if memory is an issue]

Note:  "fx" format is the
    #!/bin/sh
    /usr/local/netflow/bin/flow-export -f2 -m0x383069

END_MSG

GetOptions (
        "r=i" => \$res,
        "o=i" => \$offset,
        "f=i" => \$force_time,
        "m=i" => \$max_records,
) or die $usage;

die "ERROR: $0: Offset must be smaller than resolution.\n" unless $offset < $res;
die "ERROR: $0: Max recrds must be at least 100 or you'll never get any work done\n"
        unless $max_records > 100;

my $fxheader ="#:unix_secs,exaddr,dpkts,doctets,srcaddr,dstaddr,srcport,dstport,prot\n";
my $header=<>;
$header eq $fxheader or die <<ERR;
ERROR: $0 Expected to receive a Flow-Export Data file header like this
$fxheader
       but I got :
$header
ERR

my %store;
print $fxheader;

while(<>) {
    chomp;
    next if substr($_,0,1) eq "#";
    Export() if ($. % $max_records == 0);
    my ($utime, $exaddr, $dpkts, $doctets,$srcaddr,$dstaddr,$srcport,$dstport,$prot,$rest)=
        split/,/,$_,10;
    my $roundtime= $force_time ? $force_time : int(($utime-$offset)/$res)*$res + $offset;

    my $dd_key= join ",", $srcaddr,$dstaddr,$srcport,$dstport,$prot;
    for ($store{$roundtime}{$dd_key}{$exaddr}) {
        $_->[0]+= $dpkts;
        $_->[1]+= $doctets;
        # Allow for nonstandard fx records with extra columns 
        $_->[2] = $rest;    # It will be pot-luck as to which suffix is retained!
    }
}
Export();

sub Export {
    for my $utime (sort {$a<=>$b} keys %store) {
        while(my ($dd_key,$ref_exdata) = each %{$store{$utime}}) {
            my ($max_oct,$max_pkt,$eg_rest)=(0,0,"");

            my $witness; # The witness rouetr to the largest flow
            while (my ($exaddr,$ref_data) = each %$ref_exdata) {
                if ($max_oct <= $ref_data->[1]) {
                    ($max_pkt,$max_oct,$eg_rest) = @{$ref_data};
                    $witness= $exaddr;
                }
            }
            if ($eg_rest) {  # Frig to allow fx format CSV files with extra columns to work
                print join(",",$utime,$witness,$max_pkt,$max_oct,$dd_key,$eg_rest),"\n";
            }  else {
                print join(",",$utime,$witness,$max_pkt,$max_oct,$dd_key),"\n";
            }
        }
    }
    %store=();
}
