#!/usr/bin/perl
use strict;
use re 'taint';

  use DateTime;
  use IO::File;
  use Time::HiRes ();

  my($year_default) = 2011;  # implied year in log files (year is not stored!)
  my($y0) = 2011;  # day 1 reference year for bins

  my($d0) = DateTime->new(year=>$y0); my($d0_mjd) = $d0->mjd;
  my(%mo) = (jan=>1,  feb=>2,  mar=>3,  apr=>4,  may=>5,  jun=>6,
             jul=>7,  aug=>8,  sep=>9,  oct=>10, nov=>11, dec=>12);

  my($recno); my(%contin);
  my($t0) = Time::HiRes::time;
  while (<>) {
    $recno++; chomp; my($line) = $_;
    if (! s/^(\S+) \s+ (\d+) \s+ (\d+):(\d+):(\d+) \s+
             (\S+) \s+ ([^[: ]+) (?: \[ (\d+) \] )?: \s*//x) {
      printf STDERR ("bad line %d: %s\n",
                     $recno,$line)  if !/\blast message repeated\b.*\btimes$/;
      next;
    }
    my($y,$m,$d,$hh,$mm,$ss,$host,$proc,$pid) =
      ($year_default,$1,$2,$3,$4,$5,$6,$7,$8);
    $m = $mo{lc($m)};
    if (!defined($m)) {
      printf STDERR ("%d invalid month: %s\n", $recno,$line);
      next;
    }
    next  if $proc ne 'amavis';
    my($am_id);
    $am_id = $1  if s/^\( ( [^)]* ) \) \s+//x;
    if (/^\.\.\./) {
      if (!exists($contin{"$host/$proc/$pid"})) {
        printf STDERR ("%d NOTHING TO APPEND TO: %s\n", $recno,$line);
        $contin{"$host/$proc/$pid"} = '';
      }
      /^\.\.\.(.*)\z/;  $_ = $contin{"$host/$proc/$pid"} . $1;
      $contin{"$host/$proc/$pid"} = $_;
    # printf STDERR ("%d continuing: %s\n",
    #                $recno,$contin{"$host/$proc/$pid"});
    }
    if (length($line) > 1000 && /\.\.\.\z/) {
      /^(.*)\.\.\.\z/;  $contin{"$host/$proc/$pid"} = $1;
    # printf STDERR ("%d TO BE contin: %s\n",
    #                $recno,$contin{"$host/$proc/$pid"});
    } else {
      if (exists($contin{"$host/$proc/$pid"})) {
      # printf STDERR ("%d END OF contin: %s\n",
      #                $recno,$contin{"$host/$proc/$pid"});
        delete $contin{"$host/$proc/$pid"};
      }
      if (length($_) > 16000) {
        printf STDERR ("%d giant log entry %d chars\n", $recno,length($_));
        next;
      }
      s/^\( !+ \)//x;  # warnings marker
      next if /^SA (?:warn|error|failed):/;
      next if $am_id eq '';

      next if !s/^(?:size: (\d+), )?TIMING \[total (\d+) ms\] - //;
      my($size, $t_total) = ($1,$2);
      my(%t_fields);
      for (split(/, /)) {
        $t_fields{$1} = $2  if /^([^:]*):\s+(\d+)\s+\((\d+)%\)(\d+)\z/;
      }

      my($dt) = DateTime->new(year=>$y, month=>$m, day=>$d,
                                hour=>$hh, minute=>$mm, second=>$ss);
    # my($day) = $dt->day_of_year;
      my($day) = $dt->mjd - $d0_mjd;
      my($week_year,$week_number) = $dt->week;  # 1..53
      my($utc) = $dt->epoch;        # Unix UTC time

      printf("%d %d %d %s\n", $utc, $size||0, $t_total,
        join(' ', @t_fields{'SMTP greeting', 'SMTP DATA', 'digest_body_dkim',
                            'AV-scan-1', 'SA check'}));
    }
  }
