#!/usr/bin/perl -w
# 
# timeworked.pl
# Version 0.03  2009-08-21
#
# Fran Fabrizio, UAB CIS, fran@cis.uab.edu
# 
# Assistance From / Thanks to:
#   Drew Barnes, barnesaw@ucrwcu.rwc.uc.edu
#
# timeworked.pl is an RT reporting tool which reports minutes 
# worked per person per ticket in a given time period for one or
# more queues.
#
# ITEMS YOU MUST CHANGE IN THIS SCRIPT BEFORE USING
#  - Currently, the only thing that needs to be changed is the "use lib"
#    line (line 117 at the time of this writing) in order to point to your
#    local RT install. 
#
# SCRIPT REQUIREMENTS
#  - This script relies on the following modules:
#    RT, RT::Tickets, RT::Queues, RT::Transactions, Getopt::Long,
#    Time::Local, and Time::Zone
#
# FEATURES
#  - Produces a simple text report of the number of minutes worked per 
#    person per ticket in a given datetime range for all tickets that
#    experienced > 0 minutes worked in that time period
#  - Allows user to specify one or more queues to include in the ticket 
#    search
#  - Assumes start and end datetimes entered in local time and adjusts for
#    GMT which is RT's internal representation (but see --gmtime flag)
#  - Correctly calculates time one person puts onto another person's ticket
#  - Correctly calculates TimeWorked when directly updated via the Basics or
#    Jumbo ticket update page as well as when added to a comment or reply
#  - Optional --debug mode for verbose output
#  - Basic --help text display
#  - Uses RT's API (does not query database directly)
#
# GENERAL IMPLEMENTATION STRATEGY
#
# I took a transaction-based approach.  I use a SearchBuilder to grab all
# of the transactions that took place in the time period of interest (see TIME
# VALUES IN RT / HOW TO SPECIFY DATE RANGES below for time zone issue discussion).  
# I then look at each one and go through the following workflow:
#
#   1. Does this transaction represent a time worked modification?
#      (see INCONSISTENT RECORDING OF TIME WORKED below for issues here)  If no, 
#      skip to next transaction.
#   2. Use this transaction's object ID to retrieve the associated ticket object.
#      If this fails, report the error and skip to next transaction.
#   3. Is this ticket in a queue of interest? If no, skip to next transaction.
#   4. Was this transaction entered by the user RT_System?  If so, it is the result
#      of a merge operation.  Don't count this time and skip to the next transaction.
#      (The time will be captured elsewhere, under the original user that entered it, 
#      if it originally occurred in this time period of interest).
#   
#   Ok, if we got this far, we have real time to account for.
#
#   5. If this is the first time we are seeing this person for this ticket, create the
#      multilevel data structure to store ticket and time worked info.  
#   6. Increment the time worked value (see INCONSISTENT RECORDING OF TIME WORKED below 
#      for issues here).
#     
# TIME VALUES IN RT / HOW TO SPECIFY DATE RANGES
#
# Internally in the database, RT stores time values as gmtime.  This has implications
# for this script, which are best illustrated by an example.
#
# I live in the US/Central time zone which is -18000 seconds, or -5 hours, off from GM time.
# I also like to work late at night.  If I enter some time worked onto a ticket at 11pm on 
# August 12th, or more formally, at "2009-08-12 23:00:00" in US Central time, that will be
# entered in the transaction table with a timestamp of "2009-08-13 04:00:00".
#
# Now if I run this script with a startdate of "2009-08-12 00:00:00" and an enddate of 
# "2009-08-12 23:59:59", I would reasonably expect to get all of the time I worked on 
# August 12th, 2009.  However, I would miss the last 5 hours in the day worth of transactions, 
# because RT would have internally shifted the stored timestamp to a time in August 13th (which
# it already was, over in London!)
#
# So, I assume that users of this script will be entering their time in their local location, and
# automatically adjust for this.  In my case, this means when I enter a start or end date like 
# "2009-08-12 00:00:00", this script will convert that to "2009-08-12 05:00:00".
#
# Unrelated to this issue but also relevant to the area of time values, MySQL will treat the absence
# of the HH:MM:SS as 00:00:00.  
#
# In sum, if you want all the time worked for a week, let's say, from Sunday, August 9th through 
# Saturday, August 15th (inclusive), if you use the start and end values "2009-08-09" and "2009-08-16"
# (Note: 16 not 15!) this script will do the right thing.  To be more clear, you might use "2009-08-09"
# and "2009-08-15 23:59:59", but I am lazy and don't mind counting the first second of the 16th as part
# of the 15th. :-)
#   
# INCONSISTENT RECORDING OF TIME WORKED
# 
# Time can be entered on tickets in two main ways.
#
#  1. Putting a value in the Time Worked field as part of a Comment or Correspondence transaction.
#  2. Directly editing the Time Worked field as part of a Basic or Jumbo ticket update.
#
# Unfortunately, the way that the time gets recorded is different for each scenario.
#
# For scenario 1, in the transaction table the transaction will store the new time worked in the 
# TimeTaken field as minutes.
#
# For scenario 2, in the transaction table the transaction will use the OldValue and NewValue fields
# to store the old and new values.
#
# So, the script needs to check for both cases, and in scenario 2, has to do a little math to figure 
# out how much time was added. 
#

# OK, enough talk, away we go...

use strict;

# CHANGE THE FOLLOWING TO POINT TO YOUR RT INSTALL 
use lib qw(/usr/local/rt/local/lib /usr/local/rt/lib);

# NO EDITS REQUIRED BELOW THIS LINE

use Getopt::Long;
use Time::Local;
use Time::Zone;

# command line options
my ($startdate, $enddate, $queues, $debug, $gmtime, $byticket, $help);
GetOptions("startdate|s=s", \$startdate,
           "enddate|e=s", \$enddate,
           "queues|q=s", \$queues,
           "debug|d", \$debug,
           "gmtime|gmt", \$gmtime,
           "byticket", \$byticket,
           "help|h|?", \$help,
          );

##### command line validation and processing #####
if ($help) {
  print <<EndHELP;

Usage: timeworked.pl --startdate YYYY-MM-DD [HH:MM:SS] --enddate YYYY-MM-DD [HH:MM:SS] --queues queue1[,queue2,...] [--debug] [--gmtime] [--byticket] [--help]

Generate a text report of minutes worked per person per ticket in between startdate and enddate for an RT ticket system.

Example:
  ./timeworked.pl --startdate 2009-05-25 --enddate "2009-05-28 12:00:00" --queues Helpdesk,NetworkIncident --debug

Required parameters:
  --startdate|-s YYYY-MM-DD [HH:MM:SS] - Starting date and time of period of interest
  --enddate|-w YYYY-MM-DD [HH:MM:SS] - Ending date and time of period of interest
  --queues|-q queue1[,queue2,queue3,...] - Comma-separated list of queue names

Optional parameters:
  --debug|-d - Turns on verbose output.  Script can take a while to run, so --debug is useful to make sure its not hung.
  --gmtime|-gmt - startdate and enddate are already in GMT (otherwise assumes local time and adjusts for GMT offset since database stores in GMT).
  --byticket - Organizes the output by ticket, instead of by owner.  If a ticket was worked by multiple people, the breakdown will be displayed under the total for each ticket. 
  --help|-h|-? - Display this help text and exit.

EndHELP
  exit;
}

if (!$startdate || !$enddate) {
  die "FATAL: Must provide --startdate and --enddate.  Run timeworked.pl --help for more info.\n";
}

# regex for valid date specifications
my $datepattern = '(\d\d\d\d)-(\d\d)-(\d\d)(?: (\d\d):(\d\d):(\d\d))?';

# fields we will parse out of dates
my ($year,$mon,$day,$hour,$min,$sec) = (0,0,0,0,0,0);

# GMT offset calculation using Time::Local
my $offset = 0;
unless ($gmtime) {
  $offset = tz_local_offset;
}
debug("Time zone offset is $offset.\n");

# is startdate valid?
unless (($year,$mon,$day,$hour,$min,$sec) = $startdate =~ /$datepattern/) {
  die "FATAL: --startdate and --enddate must have format YYYY-MM-DD  [HH:MM:SS].  Run timeworked.pl --help for more info.\n";
}
# form the correct string for the database column 
my $startstring = form_date_string($year, $mon, $day, $hour, $min, $sec, $offset);
debug("Using starting date string $startstring\n");

# reset the date parsing variables
($year,$mon,$day,$hour,$min,$sec) = (0,0,0,0,0,0);

# is enddate valid?
unless (($year,$mon,$day,$hour,$min,$sec) = $enddate =~ /$datepattern/) { 
  die "FATAL: --startdate and --enddate must have format YYYY-MM-DD [HH:MM:SS].  Run timeworked.pl --help for more info.\n";
}

# form the correct date string for the database column
my $endstring = form_date_string($year, $mon, $day, $hour, $min, $sec, $offset);
debug("Using ending date string $endstring\n");

# process queue(s) list
if (!$queues) {
  die "FATAL: Must provide at least one queue via the --queue parameter (separate multiple queues by a comma e.g. queue1,queue2).  Run timeworked.pl --help for more info.\n";
}
##### end command line validation and processing #####

# Fix up include path
# this may not be necessary, but seemed prudent at the time
# stolen shamelessly from rt-crontool
BEGIN {
    require File::Spec;
    my @libs = ("lib", "local/lib");
    my $bin_path;

    for my $lib (@libs) {
        unless ( File::Spec->file_name_is_absolute($lib) ) {
            unless ($bin_path) {
                if ( File::Spec->file_name_is_absolute(__FILE__) ) {
                    $bin_path = ( File::Spec->splitpath(__FILE__) )[1];
                }
                else {
                    require FindBin;
                    no warnings "once";
                    $bin_path = $FindBin::Bin;
               }
            }
            $lib = File::Spec->catfile( $bin_path, File::Spec->updir, $lib );
        }
        unshift @INC, $lib;
    }

}

use RT;
use RT::Tickets;
use RT::Queues;
use RT::Transactions;

# Load the config file
RT::LoadConfig();

# Connect to the database and get RT::SystemUser loaded
RT::Init();

# Figure out which Queue IDs we have
my %qid;
my $qlist = new RT::Queues($RT::SystemUser);
for (split /,/, $queues) {
  $qlist->Limit(FIELD => "Name", VALUE => $_, ENTRYAGGREGATOR => 'OR');
}
while (my $q = $qlist->Next) {
  $qid{$q->id} = 1;
}

# hash to hold statistics
# %stats will be a multilevel hash - first level keys are the usernames, second level keys are 
# the ticket IDs, and for each ticket, we store an anonymous hash with keys Subject and TimeWorked
# (this implies that a single ticket can live under two+ users if they both worked the ticket)
my %stats;

# var to hold transaction search results
my $trans;

# Get a new transactions object to hold transaction search results for this ticket
$trans = new RT::Transactions($RT::SystemUser);

# only in the period of interest
$trans->Limit(FIELD => 'Created', OPERATOR => '>', VALUE => "$startstring");
$trans->Limit(FIELD => 'Created', OPERATOR => '<', VALUE => "$endstring", ENTRYAGGREGATOR => 'AND');

# now start counting all the TimeTaken by examining transactions associated with this ticket
while (my $tr = $trans->Next) {
  # did this transaction take any time?
  if (($tr->TimeTaken != 0) || ($tr->Field eq 'TimeWorked')) {
    # Got a hot one - what ticket is this?
    my $t = new RT::Ticket($RT::SystemUser);
    $t->Load($tr->ObjectId);

    if (!$t) {
      debug("Failed to retrieve ticket " . $tr->ObjectId . " from Transaction " . $tr->id . "\n");
      next;
    } else {
      debug("Retrieved object for ticket " . $tr->ObjectId . " from Transaction " . $tr->id . "\n");
      if (!exists($qid{$t->Queue})) {
        debug("Ticket " . $t->id . " is not in queue of interest.\n");
        next;
      } else {
        debug("Ticket " . $t->id . " is in a queue of interest.\n");
      }
    }

    # If this is time logged by RT_System, it's the result of a ticket merge
    # In order to avoid double-counting minutes in --byticket mode, or the less serious
    # issue of displaying a report for user RT_System in normal mode, we skip this entirely
    if ($tr->CreatorObj->Name eq 'RT_System') {
      next;
    }

    # we've got some time to account for
    
    # is this the first time this person is charging time to this ticket?
    # if so, add this ticket to their list
    if ($byticket) {
      if (!exists($stats{$t->id})) {
        $stats{$t->id}{Subject} = $t->Subject;
      }
    } else {
      if (!exists($stats{$tr->CreatorObj->Name}{$t->id}{Subject})) {
        $stats{$tr->CreatorObj->Name}{$t->id}{Subject} = $t->Subject;
      }
    }
    
    if ($tr->TimeTaken != 0) {
      # this was a comment or correspondence where the user also added some time worked
      # value of interest appears in Transaction's TimeTaken column
      if ($byticket) {
        # increment total time taken on this ticket
        $stats{$t->id}{TimeWorked} += $tr->TimeTaken;
        # increment time taken for this individual on this ticket  
        $stats{$t->id}{People}{$tr->CreatorObj->Name}{TimeWorked} += $tr->TimeTaken;  
      } else {
        $stats{$tr->CreatorObj->Name}{$t->id}{TimeWorked} += $tr->TimeTaken;
      }
      debug("Ticket " . $t->id . " TimeTaken recorded " . $tr->TimeTaken . " at time " . $tr->Created . ".\n");
    } else {
      # this was a direct update of the time worked field from the Basics or Jumbo ticket update page
      # values of interest appear in Transaction's OldValue and NewValue columns
      # For some reason, RT does not use the TimeTaken column in this instance.
      if ($byticket) {
        # increment total time taken on this ticket
        $stats{$t->id}{TimeWorked} += $tr->NewValue - $tr->OldValue;  
        # increment time taken for this individual on this ticket
        $stats{$t->id}{People}{$tr->CreatorObj->Name}{TimeWorked} += $tr->NewValue - $tr->OldValue;          
      } else {
        $stats{$tr->CreatorObj->Name}{$t->id}{TimeWorked} += $tr->NewValue - $tr->OldValue;
      }
      debug ("Ticket " . $t->id . " TimeWorked field updated from " . $tr->OldValue . " to " . $tr->NewValue . " at time " . $tr->Created . ".\n");
    }
  }
}

# rudimentary text report starts here
# default is sorted first by username alphabetically, then within each user by most worked to least worked
# --byticket changes this

print "\n\n***** TIME WORKED REPORT FOR PERIOD $startdate TO $enddate *****\n\n";

if ($byticket) {
  for my $tid (sort {$stats{$b}{TimeWorked} <=> $stats{$a}{TimeWorked}} keys %stats) {
    printf("%d: %s\n",$tid,$stats{$tid}{Subject});
    printf("  %dm (%.1fh) TOTAL TIME\n", $stats{$tid}{TimeWorked},($stats{$tid}{TimeWorked} / 60));
    for my $person (sort {$stats{$tid}{People}{$b}{TimeWorked} <=> $stats{$tid}{People}{$a}{TimeWorked}} keys %{$stats{$tid}{People}}) {
      my $minutes = $stats{$tid}{People}{$person}{TimeWorked};
      printf("    %dm (%.1fh) %s\n",$minutes,($minutes/60),$person);
    }
    print "\n";
  }
} else {
  for my $person (sort keys %stats) {
    print "PERSON: $person\n";
    print " MINUTES    HOURS  TICKET\n";
    my $totalMinutes = 0;
    for my $tid (sort {$stats{$person}{$b}{TimeWorked} <=> $stats{$person}{$a}{TimeWorked}} keys %{$stats{$person}}) {
      my $minutes = $stats{$person}{$tid}{TimeWorked};
      my $subject = $stats{$person}{$tid}{Subject};
      printf("%8d %8.1f  %s\n",$minutes,($minutes/60),"$tid: $subject");
      $totalMinutes += $minutes;
    }
    print "-------- --------  TOTALS\n";
    printf("%8d %8.1f\n",$totalMinutes,($totalMinutes/60));
    print "\n";
  }
}

##### helper functions below

sub debug {
  if ($debug) {
    print "DEBUG: " . shift;
  }
}

sub form_date_string {
  # expects seven input params - year, month, day, hour, minute, second, offset
  my $year = $_[0] - 1900;
  my $mon = $_[1] - 1;
  my $day = $_[2];
  my $hour = $_[3] ? $_[3] : 0;
  my $min = $_[4] ? $_[4] : 0;
  my $sec = $_[5] ? $_[5] : 0;
  my $offset = $_[6] ? $_[6] : 0;

  # convert to seconds since epoch, then adjust for the $offset, which is also in seconds
  # we do this so we don't have to do fancy date arithmetic - we can just subtract one seconds
  # value from the other seconds value
  my $starttime = timelocal($sec,$min,$hour,$day,$mon,$year) - $offset;
  
  # convert back to component parts now that we've adjusted for offset
  # this gives us the components which represent the GMT time for the local time that was entered
  # on the command line
  ($sec,$min,$hour,$day,$mon,$year) = localtime($starttime);

  # format the date string, padding with zeros if needed
  return sprintf("%04d-%02d-%02d %02d:%02d:%02d", ($year+1900), ($mon+1), $day, $hour, $min, $sec);
}

#use Data::Dumper;
#print Dumper($tickets->_Handle->SQLStatementLog);

