Awight has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/220714

Change subject: WIP Minimum split of BaseAuditProcessor from functions that 
don't require context
......................................................................

WIP Minimum split of BaseAuditProcessor from functions that don't require 
context

TODO:
* Define interface that must be implemented by subclasses.
* Port WorldPay and GlobalCollect.

Change-Id: I60da6b9ca7f542a32cf01ff8446ac7e68f866e6e
---
A sites/all/modules/wmf_audit/BaseAuditProcessor.php
M sites/all/modules/wmf_audit/wmf_audit.info
M sites/all/modules/wmf_audit/wmf_audit.module
3 files changed, 837 insertions(+), 824 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/crm 
refs/changes/14/220714/1

diff --git a/sites/all/modules/wmf_audit/BaseAuditProcessor.php 
b/sites/all/modules/wmf_audit/BaseAuditProcessor.php
new file mode 100644
index 0000000..b470f7d
--- /dev/null
+++ b/sites/all/modules/wmf_audit/BaseAuditProcessor.php
@@ -0,0 +1,830 @@
+<?php
+
+class BaseAuditProcessor {
+    protected $options;
+
+    public function __construct( $options ) {
+       $this->options = $options;
+       // FIXME: Copy to confusing global thing.
+       wmf_audit_runtime_options( $options );
+    }
+
+    protected function get_runtime_options( $name ) {
+       if ( isset( $this->options[$name] ) ) {
+           return $this->options[$name];
+       } else {
+           return null;
+       }
+    }
+
+    /**
+     * The main function, intended to be called straight from the drush 
command and
+     * nowhere else.
+     */
+    public function run() {
+       civicrm_initialize();
+
+       //make sure all the things we need are there.
+       if (!$this->setup_required_directories()) {
+         throw new Exception( 'Missing required directories' );
+       }
+
+       //find out what the situation is with the available recon files, by date
+       $recon_files = $this->get_all_recon_files();
+
+       //get missing transactions from one or more recon files
+       //let's just assume that the default mode will be to pop off the top 
three (most recent) at this point. :)
+       //...Three, because Shut Up.
+       $count = count($recon_files);
+       if ($count > 3 && !$this->get_runtime_options('run_all')) {
+         $count = 3;
+       }
+
+       $total_missing = array();
+       $recon_file_stats = array();
+
+       for ($i = 0; $i < $count; ++$i) {
+         $parsed = array();
+         $missing = array();
+
+         //parce the recon files into something relatively reasonable.
+         $file = array_pop($recon_files);
+         wmf_audit_echo("Parsing $file");
+         $start_time = microtime(true);
+         $parsed = $this->parse_recon_file( $file );
+         $time = microtime(true) - $start_time;
+         wmf_audit_echo(count($parsed) . " results found in $time seconds\n");
+
+         //remove transactions we already know about
+         $start_time = microtime(true);
+         $missing = $this->get_missing_transactions($parsed);
+         $recon_file_stats[$file] = wmf_audit_count_missing($missing);
+         $time = microtime(true) - $start_time;
+         wmf_audit_echo(wmf_audit_count_missing($missing) . ' missing 
transactions (of a possible ' . count($parsed) . ") identified in $time 
seconds\n");
+
+         //If the file is empty, move it off.
+         // Note that we are not archiving files that have missing 
transactions,
+         // which might be resolved below.  Those are archived on the next run,
+         // once we can confirm they have hit Civi and are no longer missing.
+         if (wmf_audit_count_missing($missing) <= 
$this->get_runtime_options('recon_complete_count')) {
+           wmf_audit_move_completed_recon_file($file, 
$this->get_recon_completed_dir());
+         }
+
+         //grumble...
+         if (!empty($missing)) {
+           foreach ($missing as $type => $data) {
+             if (!empty($missing[$type])) {
+               if (array_key_exists($type, $total_missing)) {
+                 $total_missing[$type] = array_merge($total_missing[$type], 
$missing[$type]);
+               } else {
+                 $total_missing[$type] = $missing[$type];
+               }
+             }
+           }
+         }
+       }
+       $total_missing_count = wmf_audit_count_missing($total_missing);
+       wmf_audit_echo("$total_missing_count total missing transactions 
identified at start");
+
+       //get the date distribution on what's left... for ***main transactions 
only***
+       //That should be to say: The things that are totally in the payments 
logs.
+       //Other things, we will have to look other places, or just rebuild.
+       $missing_by_date = array();
+       if (array_key_exists('main', $total_missing) && 
!empty($total_missing['main'])) {
+         foreach ($total_missing['main'] as $record) {
+           $missing_by_date[$this->get_record_human_date( $record )][] = 
$record;
+         }
+       }
+
+
+       $remaining = null;
+       if (!empty($missing_by_date)) {
+         $remaining['main'] = $this->log_hunt_and_send($missing_by_date);
+       }
+
+       //@TODO: Handle the recurring type, once we have a gateway that gives 
some of those to us.
+       //
+       //Handle the negatives now. That way, the parent transactions will 
probably exist.
+       wmf_audit_echo("Processing 'negative' transactions");
+       $neg_count = 0;
+
+       if (array_key_exists('negative', $total_missing) && 
!empty($total_missing['negative'])) {
+         foreach ($total_missing['negative'] as $record) {
+           //check to see if the parent exists. If it does, normalize and send.
+           if (worldpay_audit_main_transaction_exists_in_civi($record)) {
+             $normal = $this->normalize_negative( $record );
+             if (wmf_audit_send_transaction($normal, 'negative')) {
+               $neg_count += 1;
+               wmf_audit_echo('!');
+             }
+             wmf_audit_echo('X');
+           } else {
+             //@TODO: Some version of makemissing should make these, too. Gar.
+             $remaining['negative'][$this->get_record_human_date( $record )][] 
= $record;
+             wmf_audit_echo('.');
+           }
+         }
+         wmf_audit_echo("Processed $neg_count 'negative' transactions\n");
+       }
+
+       //Wrap it up and put a bow on it.
+       //@TODO much later: Make a fredge table for these things and dump some 
messages over there about what we just did.
+       $missing_main = 0;
+       $missing_negative = 0;
+       $missing_recurring = 0;
+       $missing_at_end = 0;
+       if (is_array($remaining) && !empty($remaining)) {
+         foreach ($remaining as $type => $data) {
+           $count = wmf_audit_count_missing($data);
+           ${'missing_' . $type} = $count;
+           $missing_at_end += $count;
+         }
+       }
+
+       $wrap_up = "\nDone! Final stats:\n";
+       $wrap_up .= "Total missing at start: $total_missing_count\n";
+       $wrap_up .= 'Missing at end: ' . $missing_at_end . "\n\n";
+
+       if ($missing_at_end > 0) {
+         $wrap_up .= "Missing transaction summary:\n";
+         $wrap_up .= "Regular donations: $missing_main\n";
+         if ($missing_main > 0) {
+           foreach ($remaining['main'] as $date => $missing) {
+             if (count($missing) > 0) {
+               $wrap_up .= "\t$date: " . count($missing) . "\n";
+             }
+           }
+         }
+         $wrap_up .= "Refunds and chargebacks: $missing_negative\n";
+         if ($missing_negative > 0) {
+           foreach ($remaining['negative'] as $date => $missing) {
+             if (count($missing) > 0) {
+               $wrap_up .= "\t$date: " . count($missing) . "\n";
+             }
+           }
+         }
+         $wrap_up .= "Recurring donations: $missing_recurring\n";
+         if ($missing_recurring > 0) {
+           foreach ($remaining['recurring'] as $date => $missing) {
+             if (count($missing) > 0) {
+               $wrap_up .= "\t$date: " . count($missing) . "\n";
+             }
+           }
+         }
+
+         $wrap_up .= "Transaction IDs:\n";
+         foreach ($remaining as $group => $transactions) {
+           foreach ($transactions as $date => $missing) {
+             foreach ($missing as $transaction) {
+               $wrap_up .= "\t" . 
WmfTransaction::from_message($transaction)->get_unique_id() . "\n";
+             }
+           }
+         }
+
+         $wrap_up .= 'Initial stats on recon files: ' . 
print_r($recon_file_stats, true) . "\n";
+       }
+
+       wmf_audit_echo($wrap_up);
+    }
+
+    /**
+     * Returns an array of the full paths to all valid reconciliation files
+     * @return array Full paths to all recon files
+     */
+    function get_all_recon_files() {
+      $files_directory = $this->get_recon_dir();
+      //foreach file in the directory, if it matches our pattern, add it to 
the array.
+      $files = array();
+      if ($handle = opendir($files_directory)) {
+       while (( $file = readdir($handle) ) !== false) {
+         if ($this->get_filetype($file) === 'recon') {
+           $filedate = $this->get_recon_file_date( $file ); //which is not the 
same thing as the edited date... probably.
+           $files[$filedate] = $files_directory . $file;
+         }
+       }
+       closedir($handle);
+       ksort($files);
+       return $files;
+      } else {
+       //can't open the directory at all. Problem.
+       wmf_audit_log_error("Can't open directory $files_directory", 
'FILE_DIR_MISSING'); //should be fatal
+      }
+      return false;
+    }
+
+    /**
+     * Go transaction hunting in the payments logs. This is by far the most
+     * processor-intensive part, but we have some timesaving new near-givens 
to work
+     * with.
+     * Things to remember:
+     * The date on the payments log, probably doesn't contain much of that 
actual
+     * date. It's going to be the previous day, mostly.
+     * Also, remember logrotate exists, so it might be the next day before we 
get
+     * the payments log we would be most interested in today.
+     *
+     * @param array $missing_by_date An array of all the missing transactions 
we
+     * have pulled out of the nightlies, indexed by the standard WMF date 
format.
+     * @return mixed An array of transactions we couldn't find or deal with (by
+     * date), or false on error
+     */
+    function log_hunt_and_send($missing_by_date) {
+
+      if (empty($missing_by_date)) {
+       wmf_audit_echo(__FUNCTION__ . ': No missing transactions sent to this 
function. Aborting.');
+       return false;
+      }
+
+      ksort($missing_by_date);
+
+      //output the initial counts for each index...
+      $earliest = null;
+      $latest = null;
+      foreach ($missing_by_date as $date => $data) {
+       if (is_null($earliest)) {
+         $earliest = $date;
+       }
+       $latest = $date;
+       wmf_audit_echo($date . " : " . count($data));
+      }
+      wmf_audit_echo("\n");
+
+
+      //REMEMBER: Log date is a liar!
+      //Stepping backwards, log date really means "Now you have all the data 
for
+      //this date, and some from the previous."
+      //Stepping forwards, it means "Now you have all the data from the 
previous
+      //date, and some from the next."
+      //
+      //Come up with the full range of logs to grab
+      //go back the number of days we have configured to search in the past 
for the
+      //current gateway
+      $earliest = wmf_common_date_add_days($earliest, -1 * 
$this->get_log_days_in_past());
+
+      //and add one to the latest to compensate for logrotate... unless that's 
the future.
+      $today = wmf_common_date_get_today_string();
+      $latest = wmf_common_date_add_days($latest, 1);
+      if ($today < $latest) {
+       $latest = $today;
+      }
+
+      //Correct for the date gap function being exclusive on the starting date 
param
+      //More explain above.
+      $earliest -= 1;
+
+      //get the array of all the logs we want to check
+      $logs_to_grab = wmf_common_date_get_date_gap($earliest, $latest);
+
+      if (empty($logs_to_grab)) {
+       wmf_audit_log_error(__FUNCTION__ . ': No logs identified as grabbable. 
Aborting.', 'RUNTIME_ERROR');
+       return false;
+      }
+
+      //want the latest first, from now on.
+      rsort($logs_to_grab);
+      krsort($missing_by_date);
+
+      //Foreach log by date DESC, check all the transactions we are missing 
that might possibly be in this log.
+      //This is going to look a little funny, because the logs are date-named 
and stamped after the day they rotate; Not after the dates for all the data in 
them.
+      //As such, they mostly contain data for the previous day (but not 
exclusively, and not all of it)
+      $tryme = array();
+      foreach ($logs_to_grab as $log_date) {
+       //Add to the pool of what's possible to find in this log, as we step 
backward through the log dates.
+       //If the log date is less than or equal to the date on the transaction
+       //(which may or may not be when it was initiated, but that's the 
past-iest
+       //option), and it hasn't already been added to the pool, add it to the 
pool.
+       //As we're stepping backward, we should look for transactions that come
+       //from the current log date, or the one before.
+       foreach ($missing_by_date as $date => $data) {
+         if ($date >= ($log_date - 1)) {
+           if (!array_key_exists($date, $tryme)) {
+             wmf_audit_echo("Adding date $date to the date pool for log date 
$log_date");
+             $tryme[$date] = $data;
+           }
+         } else {
+           break;
+         }
+       }
+
+       //log something sensible out for what we're about to do
+       $display_dates = array();
+       if (!empty($tryme)) {
+         foreach ($tryme as $date => $thing) {
+           if (count($thing) > 0) {
+             $display_dates[$date] = count($thing);
+           }
+         }
+       }
+       $log = false;
+       if (!empty($display_dates)) {
+         $message = "Checking log $log_date for missing transactions that came 
in with the following dates: ";
+         foreach ($display_dates as $display_date => $local_count) {
+           $message .= "\n\t$display_date : $local_count";
+         }
+         wmf_audit_echo($message);
+
+         //now actually check the log from $log_date, for the missing 
transactions in $tryme
+         // Get the prepped log with the current date, returning false if it's 
not there.
+         $log = $this->get_log_by_date($log_date);
+       }
+
+       if ($log) {
+         //check to see if the missing transactions we're trying now, are in 
there.
+         //Echochar with results for each one.
+         foreach ($tryme as $date => $missing) {
+           if (!empty($missing)) {
+             wmf_audit_echo("Log Date: $log_date: About to check " . 
count($missing) . " missing transactions from $date", true);
+             $checked = 0;
+             $found = 0;
+             foreach ($missing as $id => $transaction) {
+               $checked += 1;
+               //reset vars used below, for extra safety
+               $order_id = false;
+               $data = false;
+               $all_data = false;
+               $contribution_tracking_data = false;
+               $error = false;
+
+               $order_id = $this->get_order_id( $transaction );
+               if (!$order_id) {
+                 $error = array(
+                   'message' => 'Could not get an order id for the following 
transaction ' . print_r($transaction, true),
+                   'code' => 'MISSING_MANDATORY_DATA',
+                 );
+               } else {
+                 //@TODO: If you ever have a gateway that doesn't communicate 
with xml, this is going to have to be abstracted slightly.
+                 //Not going to worry about that right now, though.
+                 $data = $this->get_xml_log_data_by_order_id($order_id, $log);
+               }
+
+               //if we have data at this point, it means we have a match in 
the logs
+               if ($data) {
+                 $found += 1;
+                 $all_data = $this->normalize_and_merge_data( $data, 
$transaction );
+                 if (!$all_data) {
+                   $error = array(
+                     'message' => 'Error normalizing data. Skipping the 
following: ' . print_r($transaction, true) . "\n" . print_r($data, true),
+                     'code' => 'NORMALIZE_DATA',
+                   );
+                 }
+                 if (!$error) {
+                   //lookup contribution_tracking data, and fill it in with 
audit markers if there's nothing there.
+                   $contribution_tracking_data = 
wmf_audit_get_contribution_tracking_data($all_data);
+                 }
+
+                 if (!$contribution_tracking_data) {
+                   $error = array(
+                     'message' => 'No contribution trackin data retrieved for 
transaction ' . print_r($all_data, true),
+                     'code' => 'MISSING_MANDATORY_DATA',
+                   );
+                 }
+
+                 if (!$error) {
+                   //Now that we've made it this far: Easy check to make sure 
we're even looking at the right thing...
+                   //I'm not totally sure this is going to be the right thing 
to do, though. Intended fragility.
+                   if ((!$this->get_runtime_options('fakedb')) &&
+                     
(!empty($contribution_tracking_data['utm_payment_method'])) &&
+                     ($contribution_tracking_data['utm_payment_method'] !== 
$all_data['payment_method'])) {
+
+                     $message = 'Payment method mismatch between utm tracking 
data(' . $contribution_tracking_data['utm_payment_method'];
+                     $message .= ') and normalized log and recon data(' . 
$all_data['payment_method'] . '). Investigation required.';
+                     $error = array(
+                       'message' => $message,
+                       'code' => 'UTM_DATA_MISMATCH',
+                     );
+                   } else {
+                     unset($contribution_tracking_data['utm_payment_method']);
+                     // On the next line, the date field from all_data will 
win, which we totally want.
+                     // I had thought we'd prefer the contribution tracking 
date, but that's just silly.
+                     // However, I'd just like to point out that it would be 
terribly enlightening for some gateways to log the difference...
+                     // ...but not inside the char block, because it'll break 
the pretty.
+                     $all_data = array_merge($contribution_tracking_data, 
$all_data);
+                   }
+
+                   if (!$error) {
+                     //Send to stomp. Or somewhere. Or don't (if it's test 
mode).
+                     wmf_audit_send_transaction($all_data, 'main');
+                     unset($tryme[$date][$id]);
+                     wmf_audit_echo('!');
+                   }
+                 }
+
+               } else {
+                 //no data found in this log, which is expected and normal and 
not a problem.
+                 wmf_audit_echo('.');
+               }
+
+               //End of the transaction search/destroy loop. If we're here and 
have
+               //an error, we found something and the re-fusion didn't work.
+               //Handle consistently, and definitely don't try looking in other
+               //logs.
+               if (is_array($error)) {
+                 wmf_audit_log_error($error['message'], $error['code']);
+                 unset($tryme[$date][$id]);
+                 wmf_audit_echo('X');
+               }
+             }
+             wmf_audit_echo("Log Date: $log_date: Checked $checked missing 
transactions from $date, and found $found\n");
+           }
+         }
+       }
+      }
+
+      //That loop has been stepping back in to the past. So, use what we 
have...
+      wmf_audit_remove_old_logs($log_date, $this->read_working_logs_dir());
+
+      //if we are running in makemissing mode: make the missing transactions.
+      if ($this->get_runtime_options('makemissing')) {
+       $missing_count = wmf_audit_count_missing($tryme);
+       if ($missing_count === 0) {
+         wmf_audit_echo('No further missing transactions to make.');
+       } else {
+         //today minus three. Again: The three is because Shut Up.
+         wmf_audit_echo("Making up to $missing_count missing transactions:");
+         $made = 0;
+         $cutoff = 
wmf_common_date_add_days(wmf_common_date_get_today_string(), -3);
+         foreach ($tryme as $date => $missing) {
+           if ((int) $date <= (int) $cutoff) {
+             foreach ($missing as $id => $message) {
+               $contribution_tracking_data = 
wmf_audit_make_contribution_tracking_data($message);
+               $all_data = array_merge($message, $contribution_tracking_data);
+               $sendme = $this->normalize_partial( $all_data );
+               wmf_audit_send_transaction($sendme, 'main');
+               $made += 1;
+               wmf_audit_echo('!');
+               unset($tryme[$date][$id]);
+             }
+           }
+         }
+         wmf_audit_echo("Made $made missing transactions\n");
+       }
+      }
+
+      return $tryme; //this will contain whatever's left, if we haven't 
errored out at this point
+    }
+
+    /**
+     * Both groom and return a distilled working payments log ready to be 
searched
+     * for missing transaction data
+     * @param string $date The date of the log we want to grab
+     */
+    function get_log_by_date($date) {
+      //Could be distilled already.
+      //Could be either in .gz format in the archive
+      //check for the distilled version first
+      //check the local static cache to see if the file we want is available 
in distilled format.
+      static $ready_files = null;
+
+      if (is_null($ready_files)) {
+       $ready_files = $this->read_working_logs_dir();
+      }
+
+      //simple case: It's already ready, or none are ready
+      if (!is_null($ready_files) && array_key_exists($date, $ready_files)) {
+       return $ready_files[$date];
+      }
+
+      //This date is not ready yet. Get the zipped version from the archive, 
unzip
+      //to the working directory, and distill.
+      $compressed_filename = $this->get_compressed_log_file_name( $date );
+      $full_archive_path = wmf_audit_get_log_archive_dir() . 
$compressed_filename;
+      $working_directory = $this->get_working_log_dir();
+      $cleanup = array(); //add files we want to make sure aren't there 
anymore when we're done here.
+      if (file_exists($full_archive_path)) {
+       wmf_audit_echo("Retrieving $full_archive_path");
+       $cmd = "cp $full_archive_path " . $working_directory;
+       exec(escapeshellcmd($cmd), $ret, $errorlevel);
+       if (!file_exists($working_directory . $compressed_filename)) {
+         wmf_audit_log_error("FILE PROBLEM: Trying to get log archives, and 
something went wrong with $cmd", 'FILE_MOVE');
+         return false;
+       } else {
+         $cleanup[] = $working_directory . $compressed_filename;
+       }
+       //uncompress
+       wmf_audit_echo("Gunzipping $working_directory$compressed_filename");
+       $cmd = "gunzip -f $working_directory$compressed_filename";
+       exec(escapeshellcmd($cmd), $ret, $errorlevel);
+       //now check to make sure the file you expect, actually exists
+       $uncompressed_file = $this->get_uncompressed_log_file_name( $date );
+       if (!file_exists($working_directory . $uncompressed_file)) {
+         wmf_audit_log_error("FILE PROBLEM: Something went wrong with 
uncompressing logs: $cmd : $working_directory.$uncompressed_file doesn't 
exist.", 'FILE_UNCOMPRESS');
+       } else {
+         $cleanup[] = $working_directory . $uncompressed_file;
+       }
+
+       //distill & cache locally
+       $distilled_file = $this->get_working_log_file_name( $date );
+       //Can't escape the hard-coded string we're grepping for, because it 
breaks terribly.
+       $cmd = "grep '" . $this->get_log_distilling_grep_string() . "' " . 
escapeshellcmd($working_directory . $uncompressed_file) . " > " . 
escapeshellcmd($working_directory . $distilled_file);
+
+       wmf_audit_echo($cmd);
+       $ret = array();
+       exec($cmd, $ret, $errorlevel);
+       chmod($working_directory . $distilled_file, 0770);
+       $ready_files[$date] = $working_directory . $distilled_file;
+
+       //clean up
+       if (!empty($cleanup)) {
+         foreach ($cleanup as $deleteme) {
+           if (file_exists($deleteme)) {
+             unlink($deleteme);
+           }
+         }
+       }
+
+       //return
+       return $working_directory . $distilled_file;
+      }
+
+      //this happens if the archive file doesn't exist. Definitely not the end 
of the world, but we should probably log about it.
+      wmf_audit_log_error("Archive file $full_archive_path seems not to 
exist\n", 'MISSING_PAYMENTS_LOG');
+      return false;
+    }
+
+    /**
+     * Construct an array of all the distilled working logs we have in the 
working
+     * directory.
+     * @return array Array of date => full path to file for all distilled 
working
+     * logs
+     */
+    function read_working_logs_dir() {
+      $working_logs = array();
+      $working_dir = $this->get_working_log_dir();
+      //do the directory read and cache the results in the static
+      if (!$handle = opendir($working_dir)) {
+       die(__FUNCTION__ . ": Can't open directory. We should have noticed 
earlier (in setup_required_directories) that this wasn't going to work. \n");
+      }
+      while (( $file = readdir($handle) ) !== false) {
+       $temp_date = false;
+       if ($this->get_filetype($file) === 'working_log') {
+         $full_path = $working_dir . $file;
+         $temp_date = $this->get_working_log_file_date( $file );
+       }
+       if (!$temp_date) {
+         continue;
+       }
+       $working_logs[$temp_date] = $full_path;
+      }
+      return $working_logs;
+    }
+
+    /**
+     * Figures out what type of file you've got there, according to what the 
gateway
+     * module has defined in its _regex_for_ functions.
+     * @param string $file Full path to the file of interest
+     * @return mixed 
'recon'|'working_log'|'uncompressed_log'|'compressed_log'|false
+     * if it's nothing the gateway recognizes.
+     */
+    function get_filetype($file) {
+      //we have three types of files, right? compressed, uncompressed, 
distilled, and recon file.
+      //...four. Four types.
+
+      $types = array(
+       'recon',
+       'working_log',
+       'uncompressed_log',
+       'compressed_log',
+      );
+
+      foreach ($types as $type) {
+       $function_name = 'regex_for_' . $type;
+       // TODO: map rather than functions
+       if (preg_match($this->$function_name(), $file)) {
+         return $type;
+       }
+      }
+
+      return false;
+    }
+
+    /**
+     * Moves recon files to the completed directory. This should probably only 
be
+     * done at the beginning of a run: If we're running in queue flood mode, we
+     * don't know if the data will actually make it all the way in.
+     * @param string $file Full path to the file we want to move off
+     * @return boolean true on success, otherwise false
+     */
+    function move_completed_recon_file($file) {
+      $files_directory = $this->get_recon_completed_dir();
+      $completed_dir = $files_directory;
+      if (!is_dir($completed_dir)) {
+       if (!mkdir($completed_dir, 0770)) {
+         $message = "Could not make $completed_dir";
+         wmf_audit_log_error($message, 'FILE_PERMS');
+         return false;
+       }
+      }
+
+      $filename = basename($file);
+      $newfile = $completed_dir . '/' . $filename;
+
+      if (!rename($file, $newfile)) {
+       $message = "Unable to move $file to $newfile";
+
+       wmf_audit_log_error($message, 'FILE_PERMS');
+       return false;
+      } else {
+       chmod($newfile, 0770);
+      }
+      wmf_audit_echo("Moved $file to $newfile");
+      return true;
+    }
+
+    /**
+     * Make sure all the directories we need are there.
+     * @return boolean true on success, otherwise false
+     */
+    function setup_required_directories() {
+      $directories = array(
+       'log_archive' => wmf_audit_get_log_archive_dir(),
+       'recon' => $this->get_recon_dir(),
+       'log_working' => $this->get_working_log_dir(),
+       'recon_completed' => $this->get_recon_completed_dir(),
+      );
+
+      foreach ($directories as $id => $dir) {
+       if (!is_dir($dir)) {
+         if ($id === 'log_archive' || $id === 'recon') {
+           //already done. We don't want to try to create these, because 
required files come from here.
+           wmf_audit_log_error("Missing required directory $dir", 
'MISSING_DIR_' . strtoupper($id));
+           return false;
+         }
+
+         if (!mkdir($dir, 0770)) {
+           wmf_audit_log_error("Missing and could not create required 
directory $dir", 'MISSING_DIR_' . strtoupper($id));
+           return false;
+         }
+       }
+      }
+      return true;
+    }
+
+    /**
+     * Check the database to see if we have already recorded the transactions
+     * present in the recon files.
+     * @param array $transactions An array of transactions we have already 
parsed
+     * out from the recon files.
+     * @return mixed An array of transactions that are not in the database 
already,
+     * or false if something goes wrong enough
+     */
+    function get_missing_transactions($transactions) {
+      if (empty($transactions)) {
+       wmf_audit_echo(__FUNCTION__ . ': No transactions to find. Returning.');
+       return false;
+      }
+      //go through the transactions and check to see if they're in civi
+      //@TODO: RECURRING. Won't matter for WP initially, though, so I'm 
leaving that for the WX integration phase.
+      $missing = array(
+       'main' => array(),
+       'negative' => array(),
+       'recurring' => array(),
+      );
+      foreach ($transactions as $transaction) {
+       if ($this->record_is_refund( $transaction ) || 
$this->record_is_chargeback( $transaction )) { //negative
+         if ($this->negative_transaction_exists_in_civi( $transaction ) === 
false) {
+           wmf_audit_echo('-'); //add a subtraction. I am the helpfulest 
comment ever.
+           $missing['negative'][] = $transaction;
+         } else {
+           wmf_audit_echo('.');
+         }
+       } else { //normal type
+         if ($this->main_transaction_exists_in_civi( $transaction ) === false) 
{
+           wmf_audit_echo('!');
+           $missing['main'][] = $transaction;
+         } else {
+           wmf_audit_echo('.');
+         }
+       }
+      }
+      return $missing;
+    }
+
+    /**
+     * Visualization helper. Returns the character we want to display for the 
kind
+     * of transaction we have just parsed out of a recon file.
+     * @param array $record A single transaction from a recon file
+     * @return char A single char to display in the char block.
+     */
+    function audit_echochar($record) {
+
+      if ($this->record_is_refund( $record )) {
+       return 'r';
+      }
+
+      if ($this->record_is_chargeback( $record )) {
+       return 'b';
+      }
+
+      if ($record['payment_method'] === 'cc') {
+       return 'c';
+      }
+
+      echo print_r($record, true);
+      die(__FUNCTION__ . " Not cc...");
+    }
+
+    /**
+     * Returns supplemental data for the relevant order id from the specified
+     * payments log, if there is any in there. This is the only sure place we 
can
+     * catch data we didn't save, as we're basically grepping for the exact 
data we
+     * sent to the gateway.
+     * It should be noted that this function makes no attempt to normalize the 
data.
+     * If this log doesn't contain data for the order_id in question, return 
false.
+     * @param string $order_id The order id (transaction id) of the missing 
payment
+     * @param string $log The full path to the log we want to search
+     * @return array|boolean The data we sent to the gateway for that order 
id, or
+     * false if we can't find it there.
+     */
+    function get_xml_log_data_by_order_id($order_id, $log) {
+      if (!$order_id) {
+       return false;
+      }
+
+      $cmd = 'grep ' . $this->get_log_line_grep_string( $order_id ) . ' ' . 
$log;
+      wmf_audit_echo(__FUNCTION__ . ' ' . $cmd, true);
+
+      $ret = array();
+      exec(escapeshellcmd($cmd), $ret, $errorlevel);
+
+      if (count($ret) > 0) {
+
+       //In this wonderful new world, we only expect one line.
+       if (count($ret) > 1) {
+         wmf_audit_echo("Odd: More than one logline returned for $order_id. 
Investigation Required.");
+       }
+
+       //just take the last one, just in case somebody did manage to do a 
duplicate.
+       $line = $ret[count($ret) - 1];
+       // $linedata for *everything* from payments goes Month, day, time, box, 
bucket, CTID:OID, absolute madness with lots of unpredictable spaces.
+       $linedata = explode(' ', $line);
+       $contribution_id = explode(':', $linedata[5]);
+       $contribution_id = $contribution_id[0];
+
+
+       //look for the raw xml
+       $full_xml = false;
+       $node = $this->get_log_line_xml_outermost_node();
+       $xmlstart = strpos($line, '<?xml');
+       if ($xmlstart === false) {
+         $xmlstart = strpos($line, "<$node>");
+       }
+       $xmlend = strpos($line, "</$node>");
+       if ($xmlend) {
+         $full_xml = true;
+         $xmlend += (strlen($node) + 3);
+         $xml = substr($line, $xmlstart, $xmlend - $xmlstart);
+       } else {
+         //this is a broken line, and it won't load... but we can still parse 
what's left of the thing, the slow way.
+         $xml = substr($line, $xmlstart);
+       }
+       // Syslog wart.  Other control characters should be encoded normally.
+       $xml = str_replace( '#012', "\n", $xml );
+
+       $donor_data = array();
+
+       if ($full_xml) {
+         $xmlobj = new DomDocument;
+         $xmlobj->loadXML($xml);
+
+         $parent_nodes = $this->get_log_line_xml_parent_nodes();
+
+         if (empty($parent_nodes)) {
+           wmf_audit_log_error(__FUNCTION__ . ': No parent nodes defined. Can 
not continue.', 'RUNTIME_ERROR');
+           die("Stop dying here");
+         }
+
+         foreach ($parent_nodes as $parent_node) {
+           foreach ($xmlobj->getElementsByTagName($parent_node) as $node) {
+             foreach ($node->childNodes as $childnode) {
+               if (trim($childnode->nodeValue) != '') {
+                 $donor_data[$childnode->nodeName] = $childnode->nodeValue;
+               }
+             }
+           }
+         }
+       } else {
+         //the XML got cut off prematurely, probably because syslog was set up 
to truncate on payments.
+         //rebuild what we can the old-fashioned way.
+         $search_for_nodes = $this->get_log_line_xml_data_nodes();
+
+         if (empty($search_for_nodes)) {
+           wmf_audit_log_error(__FUNCTION__ . ': No searchable nodes defined. 
Can not continue.', 'RUNTIME_ERROR');
+           die("Stop dying here");
+         }
+         foreach ($search_for_nodes as $node) {
+           $tmp = wmf_audit_get_partial_xml_node_value($node, $xml);
+           if (!is_null($tmp)) {
+             $donor_data[$node] = $tmp;
+           }
+         }
+       }
+
+       if (!empty($donor_data)) {
+         $donor_data['contribution_tracking_id'] = $contribution_id;
+         return $donor_data;
+       } else {
+         wmf_audit_log_error("We found a transaction in the logs for 
$order_id, but there's nothing left after we tried to grab its data. 
Investigation required.", 'DATA_WEIRD');
+       }
+      }
+      return false; //no big deal, it just wasn't there. This will happen most 
of the time.
+    }
+}
diff --git a/sites/all/modules/wmf_audit/wmf_audit.info 
b/sites/all/modules/wmf_audit/wmf_audit.info
index d82630a..3c1b3d5 100644
--- a/sites/all/modules/wmf_audit/wmf_audit.info
+++ b/sites/all/modules/wmf_audit/wmf_audit.info
@@ -7,3 +7,4 @@
 dependencies[] = wmf_civicrm
 package = WMF Audit
 configure = admin/config/wmf_audit
+files[] = BaseAuditProcessor.php
diff --git a/sites/all/modules/wmf_audit/wmf_audit.module 
b/sites/all/modules/wmf_audit/wmf_audit.module
index adff527..f40704c 100644
--- a/sites/all/modules/wmf_audit/wmf_audit.module
+++ b/sites/all/modules/wmf_audit/wmf_audit.module
@@ -65,194 +65,6 @@
 }
 
 /**
- * The main function, intended to be called straight from the drush command and
- * nowhere else.
- * @param string $submod_prefix The prefix for the gateway-specific submodule
- * for which we are running the nightly audit. For any of this to work, there
- * will have to be a submodule called $submod_prefix . '_audit'.
- * @param array $drush_params An array of the drush parameters that were set at
- * the command line.
- * @return boolean
- */
-function wmf_audit_main($submod_prefix, $drush_params) {
-  civicrm_initialize();
-
-  //this is more of a runtime param... maybe set it on the outside? But no, I
-  //kind of like that this one thing is explicitly required in an obvious way 
to
-  //even call the function.
-  $drush_params['submod_prefix'] = $submod_prefix;
-
-  //This function is weird. Because we're passing in an array, it's setting the
-  //runtime params here.
-  //Everywhere else, it will work as a get function.
-  wmf_audit_runtime_options($drush_params);
-
-
-  //make sure all the things we need are there.
-  if (!wmf_audit_setup_required_directories()) {
-    return false;
-  }
-
-  //find out what the situation is with the available recon files, by date
-  $recon_files = wmf_audit_get_all_recon_files();
-
-  //get missing transactions from one or more recon files
-  //let's just assume that the default mode will be to pop off the top three 
(most recent) at this point. :)
-  //...Three, because Shut Up.
-  $count = count($recon_files);
-  if ($count > 3 && !wmf_audit_runtime_options('run_all')) {
-    $count = 3;
-  }
-
-  $total_missing = array();
-  $recon_file_stats = array();
-
-  for ($i = 0; $i < $count; ++$i) {
-    $parsed = array();
-    $missing = array();
-
-    //parce the recon files into something relatively reasonable.
-    $file = array_pop($recon_files);
-    wmf_audit_echo("Parsing $file");
-    $start_time = microtime(true);
-    $parsed = wmfa_execute('parse_recon_file', array('file' => $file));
-    $time = microtime(true) - $start_time;
-    wmf_audit_echo(count($parsed) . " results found in $time seconds\n");
-
-    //remove transactions we already know about
-    $start_time = microtime(true);
-    $missing = wmf_audit_get_missing_transactions($parsed);
-    $recon_file_stats[$file] = wmf_audit_count_missing($missing);
-    $time = microtime(true) - $start_time;
-    wmf_audit_echo(wmf_audit_count_missing($missing) . ' missing transactions 
(of a possible ' . count($parsed) . ") identified in $time seconds\n");
-
-    //If the file is empty, move it off.
-    // Note that we are not archiving files that have missing transactions,
-    // which might be resolved below.  Those are archived on the next run,
-    // once we can confirm they have hit Civi and are no longer missing.
-    if (wmf_audit_count_missing($missing) <= 
wmf_audit_runtime_options('recon_complete_count')) {
-      wmf_audit_move_completed_recon_file($file);
-    }
-
-    //grumble...
-    if (!empty($missing)) {
-      foreach ($missing as $type => $data) {
-        if (!empty($missing[$type])) {
-          if (array_key_exists($type, $total_missing)) {
-            $total_missing[$type] = array_merge($total_missing[$type], 
$missing[$type]);
-          } else {
-            $total_missing[$type] = $missing[$type];
-          }
-        }
-      }
-    }
-  }
-  $total_missing_count = wmf_audit_count_missing($total_missing);
-  wmf_audit_echo("$total_missing_count total missing transactions identified 
at start");
-
-  //get the date distribution on what's left... for ***main transactions 
only***
-  //That should be to say: The things that are totally in the payments logs.
-  //Other things, we will have to look other places, or just rebuild.
-  $missing_by_date = array();
-  if (array_key_exists('main', $total_missing) && 
!empty($total_missing['main'])) {
-    foreach ($total_missing['main'] as $record) {
-      $missing_by_date[wmfa_execute('get_record_human_date', array('record' => 
$record))][] = $record;
-    }
-  }
-
-
-  $remaining = null;
-  if (!empty($missing_by_date)) {
-    $remaining['main'] = wmf_audit_log_hunt_and_send($missing_by_date);
-  }
-
-  //@TODO: Handle the recurring type, once we have a gateway that gives some 
of those to us.
-  //
-  //Handle the negatives now. That way, the parent transactions will probably 
exist.
-  wmf_audit_echo("Processing 'negative' transactions");
-  $neg_count = 0;
-
-  if (array_key_exists('negative', $total_missing) && 
!empty($total_missing['negative'])) {
-    foreach ($total_missing['negative'] as $record) {
-      //check to see if the parent exists. If it does, normalize and send.
-      if (worldpay_audit_main_transaction_exists_in_civi($record)) {
-        $normal = wmfa_execute('normalize_negative', array('record' => 
$record));
-        if (wmf_audit_send_transaction($normal, 'negative')) {
-          $neg_count += 1;
-          wmf_audit_echo('!');
-        }
-        wmf_audit_echo('X');
-      } else {
-        //@TODO: Some version of makemissing should make these, too. Gar.
-        $remaining['negative'][wmfa_execute('get_record_human_date', 
array('record' => $record))][] = $record;
-        wmf_audit_echo('.');
-      }
-    }
-    wmf_audit_echo("Processed $neg_count 'negative' transactions\n");
-  }
-
-  //Wrap it up and put a bow on it.
-  //@TODO much later: Make a fredge table for these things and dump some 
messages over there about what we just did.
-  $missing_main = 0;
-  $missing_negative = 0;
-  $missing_recurring = 0;
-  $missing_at_end = 0;
-  if (is_array($remaining) && !empty($remaining)) {
-    foreach ($remaining as $type => $data) {
-      $count = wmf_audit_count_missing($data);
-      ${'missing_' . $type} = $count;
-      $missing_at_end += $count;
-    }
-  }
-
-  $wrap_up = "\nDone! Final stats:\n";
-  $wrap_up .= "Total missing at start: $total_missing_count\n";
-  $wrap_up .= 'Missing at end: ' . $missing_at_end . "\n\n";
-
-  if ($missing_at_end > 0) {
-    $wrap_up .= "Missing transaction summary:\n";
-    $wrap_up .= "Regular donations: $missing_main\n";
-    if ($missing_main > 0) {
-      foreach ($remaining['main'] as $date => $missing) {
-        if (count($missing) > 0) {
-          $wrap_up .= "\t$date: " . count($missing) . "\n";
-        }
-      }
-    }
-    $wrap_up .= "Refunds and chargebacks: $missing_negative\n";
-    if ($missing_negative > 0) {
-      foreach ($remaining['negative'] as $date => $missing) {
-        if (count($missing) > 0) {
-          $wrap_up .= "\t$date: " . count($missing) . "\n";
-        }
-      }
-    }
-    $wrap_up .= "Recurring donations: $missing_recurring\n";
-    if ($missing_recurring > 0) {
-      foreach ($remaining['recurring'] as $date => $missing) {
-        if (count($missing) > 0) {
-          $wrap_up .= "\t$date: " . count($missing) . "\n";
-        }
-      }
-    }
-
-    $wrap_up .= "Transaction IDs:\n";
-    foreach ($remaining as $group => $transactions) {
-      foreach ($transactions as $date => $missing) {
-        foreach ($missing as $transaction) {
-          $wrap_up .= "\t" . 
WmfTransaction::from_message($transaction)->get_unique_id() . "\n";
-        }
-      }
-    }
-
-    $wrap_up .= 'Initial stats on recon files: ' . print_r($recon_file_stats, 
true) . "\n";
-  }
-
-  wmf_audit_echo($wrap_up);
-  return true;
-}
-
-/**
  * Counts the missing transactions in the main array of missing transactions.
  * This is annoying and needed its own function, because the $missing array 
goes
  * $missing[$type]=> array of transactions.
@@ -273,366 +85,13 @@
 }
 
 /**
- * Returns an array of the full paths to all valid reconciliation files
- * @return array Full paths to all recon files
- */
-function wmf_audit_get_all_recon_files() {
-  $files_directory = wmfa_execute('get_recon_dir');
-  //foreach file in the directory, if it matches our pattern, add it to the 
array.
-  $files = array();
-  if ($handle = opendir($files_directory)) {
-    while (( $file = readdir($handle) ) !== false) {
-      if (wmf_audit_get_filetype($file) === 'recon') {
-        $filedate = wmfa_execute('get_recon_file_date', array($file)); //which 
is not the same thing as the edited date... probably.
-        $files[$filedate] = $files_directory . $file;
-      }
-    }
-    closedir($handle);
-    ksort($files);
-    return $files;
-  } else {
-    //can't open the directory at all. Problem.
-    wmf_audit_log_error("Can't open directory $files_directory", 
'FILE_DIR_MISSING'); //should be fatal
-  }
-  return false;
-}
-
-/**
- * Go transaction hunting in the payments logs. This is by far the most
- * processor-intensive part, but we have some timesaving new near-givens to 
work
- * with.
- * Things to remember:
- * The date on the payments log, probably doesn't contain much of that actual
- * date. It's going to be the previous day, mostly.
- * Also, remember logrotate exists, so it might be the next day before we get
- * the payments log we would be most interested in today.
- *
- * @param array $missing_by_date An array of all the missing transactions we
- * have pulled out of the nightlies, indexed by the standard WMF date format.
- * @return mixed An array of transactions we couldn't find or deal with (by
- * date), or false on error
- */
-function wmf_audit_log_hunt_and_send($missing_by_date) {
-
-  if (empty($missing_by_date)) {
-    wmf_audit_echo(__FUNCTION__ . ': No missing transactions sent to this 
function. Aborting.');
-    return false;
-  }
-
-  ksort($missing_by_date);
-
-  //output the initial counts for each index...
-  $earliest = null;
-  $latest = null;
-  foreach ($missing_by_date as $date => $data) {
-    if (is_null($earliest)) {
-      $earliest = $date;
-    }
-    $latest = $date;
-    wmf_audit_echo($date . " : " . count($data));
-  }
-  wmf_audit_echo("\n");
-
-
-  //REMEMBER: Log date is a liar!
-  //Stepping backwards, log date really means "Now you have all the data for
-  //this date, and some from the previous."
-  //Stepping forwards, it means "Now you have all the data from the previous
-  //date, and some from the next."
-  //
-  //Come up with the full range of logs to grab
-  //go back the number of days we have configured to search in the past for the
-  //current gateway
-  $earliest = wmf_common_date_add_days($earliest, -1 * 
wmfa_execute('get_log_days_in_past'));
-
-  //and add one to the latest to compensate for logrotate... unless that's the 
future.
-  $today = wmf_common_date_get_today_string();
-  $latest = wmf_common_date_add_days($latest, 1);
-  if ($today < $latest) {
-    $latest = $today;
-  }
-
-  //Correct for the date gap function being exclusive on the starting date 
param
-  //More explain above.
-  $earliest -= 1;
-
-  //get the array of all the logs we want to check
-  $logs_to_grab = wmf_common_date_get_date_gap($earliest, $latest);
-
-  if (empty($logs_to_grab)) {
-    wmf_audit_log_error(__FUNCTION__ . ': No logs identified as grabbable. 
Aborting.', 'RUNTIME_ERROR');
-    return false;
-  }
-
-  //want the latest first, from now on.
-  rsort($logs_to_grab);
-  krsort($missing_by_date);
-
-  //Foreach log by date DESC, check all the transactions we are missing that 
might possibly be in this log.
-  //This is going to look a little funny, because the logs are date-named and 
stamped after the day they rotate; Not after the dates for all the data in them.
-  //As such, they mostly contain data for the previous day (but not 
exclusively, and not all of it)
-  $tryme = array();
-  foreach ($logs_to_grab as $log_date) {
-    //Add to the pool of what's possible to find in this log, as we step 
backward through the log dates.
-    //If the log date is less than or equal to the date on the transaction
-    //(which may or may not be when it was initiated, but that's the past-iest
-    //option), and it hasn't already been added to the pool, add it to the 
pool.
-    //As we're stepping backward, we should look for transactions that come
-    //from the current log date, or the one before.
-    foreach ($missing_by_date as $date => $data) {
-      if ($date >= ($log_date - 1)) {
-        if (!array_key_exists($date, $tryme)) {
-          wmf_audit_echo("Adding date $date to the date pool for log date 
$log_date");
-          $tryme[$date] = $data;
-        }
-      } else {
-        break;
-      }
-    }
-
-    //log something sensible out for what we're about to do
-    $display_dates = array();
-    if (!empty($tryme)) {
-      foreach ($tryme as $date => $thing) {
-        if (count($thing) > 0) {
-          $display_dates[$date] = count($thing);
-        }
-      }
-    }
-    $log = false;
-    if (!empty($display_dates)) {
-      $message = "Checking log $log_date for missing transactions that came in 
with the following dates: ";
-      foreach ($display_dates as $display_date => $local_count) {
-        $message .= "\n\t$display_date : $local_count";
-      }
-      wmf_audit_echo($message);
-
-      //now actually check the log from $log_date, for the missing 
transactions in $tryme
-      // Get the prepped log with the current date, returning false if it's 
not there.
-      $log = wmf_audit_get_log_by_date($log_date);
-    }
-
-    if ($log) {
-      //check to see if the missing transactions we're trying now, are in 
there.
-      //Echochar with results for each one.
-      foreach ($tryme as $date => $missing) {
-        if (!empty($missing)) {
-          wmf_audit_echo("Log Date: $log_date: About to check " . 
count($missing) . " missing transactions from $date", true);
-          $checked = 0;
-          $found = 0;
-          foreach ($missing as $id => $transaction) {
-            $checked += 1;
-            //reset vars used below, for extra safety
-            $order_id = false;
-            $data = false;
-            $all_data = false;
-            $contribution_tracking_data = false;
-            $error = false;
-
-            $order_id = wmfa_execute('get_order_id', array('transaction' => 
$transaction));
-            if (!$order_id) {
-              $error = array(
-                'message' => 'Could not get an order id for the following 
transaction ' . print_r($transaction, true),
-                'code' => 'MISSING_MANDATORY_DATA',
-              );
-            } else {
-              //@TODO: If you ever have a gateway that doesn't communicate 
with xml, this is going to have to be abstracted slightly.
-              //Not going to worry about that right now, though.
-              $data = wmf_audit_get_xml_log_data_by_order_id($order_id, $log);
-            }
-
-            //if we have data at this point, it means we have a match in the 
logs
-            if ($data) {
-              $found += 1;
-              $all_data = wmfa_execute('normalize_and_merge_data', 
array('xml_data' => $data, 'recon_data' => $transaction));
-              if (!$all_data) {
-                $error = array(
-                  'message' => 'Error normalizing data. Skipping the 
following: ' . print_r($transaction, true) . "\n" . print_r($data, true),
-                  'code' => 'NORMALIZE_DATA',
-                );
-              }
-              if (!$error) {
-                //lookup contribution_tracking data, and fill it in with audit 
markers if there's nothing there.
-                $contribution_tracking_data = 
wmf_audit_get_contribution_tracking_data($all_data);
-              }
-
-              if (!$contribution_tracking_data) {
-                $error = array(
-                  'message' => 'No contribution trackin data retrieved for 
transaction ' . print_r($all_data, true),
-                  'code' => 'MISSING_MANDATORY_DATA',
-                );
-              }
-
-              if (!$error) {
-                //Now that we've made it this far: Easy check to make sure 
we're even looking at the right thing...
-                //I'm not totally sure this is going to be the right thing to 
do, though. Intended fragility.
-                if ((!wmf_audit_runtime_options('fakedb')) &&
-                  (!empty($contribution_tracking_data['utm_payment_method'])) 
&&
-                  ($contribution_tracking_data['utm_payment_method'] !== 
$all_data['payment_method'])) {
-
-                  $message = 'Payment method mismatch between utm tracking 
data(' . $contribution_tracking_data['utm_payment_method'];
-                  $message .= ') and normalized log and recon data(' . 
$all_data['payment_method'] . '). Investigation required.';
-                  $error = array(
-                    'message' => $message,
-                    'code' => 'UTM_DATA_MISMATCH',
-                  );
-                } else {
-                  unset($contribution_tracking_data['utm_payment_method']);
-                  // On the next line, the date field from all_data will win, 
which we totally want.
-                  // I had thought we'd prefer the contribution tracking date, 
but that's just silly.
-                  // However, I'd just like to point out that it would be 
terribly enlightening for some gateways to log the difference...
-                  // ...but not inside the char block, because it'll break the 
pretty.
-                  $all_data = array_merge($contribution_tracking_data, 
$all_data);
-                }
-
-                if (!$error) {
-                  //Send to stomp. Or somewhere. Or don't (if it's test mode).
-                  wmf_audit_send_transaction($all_data, 'main');
-                  unset($tryme[$date][$id]);
-                  wmf_audit_echo('!');
-                }
-              }
-
-            } else {
-              //no data found in this log, which is expected and normal and 
not a problem.
-              wmf_audit_echo('.');
-            }
-
-            //End of the transaction search/destroy loop. If we're here and 
have
-            //an error, we found something and the re-fusion didn't work.
-            //Handle consistently, and definitely don't try looking in other
-            //logs.
-            if (is_array($error)) {
-              wmf_audit_log_error($error['message'], $error['code']);
-              unset($tryme[$date][$id]);
-              wmf_audit_echo('X');
-            }
-          }
-          wmf_audit_echo("Log Date: $log_date: Checked $checked missing 
transactions from $date, and found $found\n");
-        }
-      }
-    }
-  }
-
-  //That loop has been stepping back in to the past. So, use what we have...
-  wmf_audit_remove_old_logs($log_date);
-
-  //if we are running in makemissing mode: make the missing transactions.
-  if (wmf_audit_runtime_options('makemissing')) {
-    $missing_count = wmf_audit_count_missing($tryme);
-    if ($missing_count === 0) {
-      wmf_audit_echo('No further missing transactions to make.');
-    } else {
-      //today minus three. Again: The three is because Shut Up.
-      wmf_audit_echo("Making up to $missing_count missing transactions:");
-      $made = 0;
-      $cutoff = wmf_common_date_add_days(wmf_common_date_get_today_string(), 
-3);
-      foreach ($tryme as $date => $missing) {
-        if ((int) $date <= (int) $cutoff) {
-          foreach ($missing as $id => $message) {
-            $contribution_tracking_data = 
wmf_audit_make_contribution_tracking_data($message);
-            $all_data = array_merge($message, $contribution_tracking_data);
-            $sendme = wmfa_execute('normalize_partial', array('record' => 
$all_data));
-            wmf_audit_send_transaction($sendme, 'main');
-            $made += 1;
-            wmf_audit_echo('!');
-            unset($tryme[$date][$id]);
-          }
-        }
-      }
-      wmf_audit_echo("Made $made missing transactions\n");
-    }
-  }
-
-  return $tryme; //this will contain whatever's left, if we haven't errored 
out at this point
-}
-
-/**
- * Both groom and return a distilled working payments log ready to be searched
- * for missing transaction data
- * @param string $date The date of the log we want to grab
- */
-function wmf_audit_get_log_by_date($date) {
-  //Could be distilled already.
-  //Could be either in .gz format in the archive
-  //check for the distilled version first
-  //check the local static cache to see if the file we want is available in 
distilled format.
-  static $ready_files = null;
-
-  if (is_null($ready_files)) {
-    $ready_files = wmf_audit_read_working_logs_dir();
-  }
-
-  //simple case: It's already ready, or none are ready
-  if (!is_null($ready_files) && array_key_exists($date, $ready_files)) {
-    return $ready_files[$date];
-  }
-
-  //This date is not ready yet. Get the zipped version from the archive, unzip
-  //to the working directory, and distill.
-  $compressed_filename = wmfa_execute('get_compressed_log_file_name', 
array('date' => $date));
-  $full_archive_path = wmf_audit_get_log_archive_dir() . $compressed_filename;
-  $working_directory = wmfa_execute('get_working_log_dir');
-  $cleanup = array(); //add files we want to make sure aren't there anymore 
when we're done here.
-  if (file_exists($full_archive_path)) {
-    wmf_audit_echo("Retrieving $full_archive_path");
-    $cmd = "cp $full_archive_path " . $working_directory;
-    exec(escapeshellcmd($cmd), $ret, $errorlevel);
-    if (!file_exists($working_directory . $compressed_filename)) {
-      wmf_audit_log_error("FILE PROBLEM: Trying to get log archives, and 
something went wrong with $cmd", 'FILE_MOVE');
-      return false;
-    } else {
-      $cleanup[] = $working_directory . $compressed_filename;
-    }
-    //uncompress
-    wmf_audit_echo("Gunzipping $working_directory$compressed_filename");
-    $cmd = "gunzip -f $working_directory$compressed_filename";
-    exec(escapeshellcmd($cmd), $ret, $errorlevel);
-    //now check to make sure the file you expect, actually exists
-    $uncompressed_file = wmfa_execute('get_uncompressed_log_file_name', 
array('date' => $date));
-    if (!file_exists($working_directory . $uncompressed_file)) {
-      wmf_audit_log_error("FILE PROBLEM: Something went wrong with 
uncompressing logs: $cmd : $working_directory.$uncompressed_file doesn't 
exist.", 'FILE_UNCOMPRESS');
-    } else {
-      $cleanup[] = $working_directory . $uncompressed_file;
-    }
-
-    //distill & cache locally
-    $distilled_file = wmfa_execute('get_working_log_file_name', array('date' 
=> $date));
-    //Can't escape the hard-coded string we're grepping for, because it breaks 
terribly.
-    $cmd = "grep '" . wmfa_execute('get_log_distilling_grep_string') . "' " . 
escapeshellcmd($working_directory . $uncompressed_file) . " > " . 
escapeshellcmd($working_directory . $distilled_file);
-
-    wmf_audit_echo($cmd);
-    $ret = array();
-    exec($cmd, $ret, $errorlevel);
-    chmod($working_directory . $distilled_file, 0770);
-    $ready_files[$date] = $working_directory . $distilled_file;
-
-    //clean up
-    if (!empty($cleanup)) {
-      foreach ($cleanup as $deleteme) {
-        if (file_exists($deleteme)) {
-          unlink($deleteme);
-        }
-      }
-    }
-
-    //return
-    return $working_directory . $distilled_file;
-  }
-
-  //this happens if the archive file doesn't exist. Definitely not the end of 
the world, but we should probably log about it.
-  wmf_audit_log_error("Archive file $full_archive_path seems not to exist\n", 
'MISSING_PAYMENTS_LOG');
-  return false;
-}
-
-/**
  * Remove all distilled logs older than the oldest date ($date)
  * Not even a big deal if we overshoot and remove too many, because we'll just
  * remake them next time if they're missing.
  * @param string $date The date string for the oldest log we want to keep
+ * @param string $working_logs path to working logs directory
  */
-function wmf_audit_remove_old_logs($date) {
-  $working_logs = wmf_audit_read_working_logs_dir();
+function wmf_audit_remove_old_logs($date, $working_logs) {
   if (!empty($working_logs)) {
     foreach ($working_logs as $logdate => $file) {
       if ((int) $logdate < (int) $date) {
@@ -643,70 +102,14 @@
 }
 
 /**
- * Construct an array of all the distilled working logs we have in the working
- * directory.
- * @return array Array of date => full path to file for all distilled working
- * logs
- */
-function wmf_audit_read_working_logs_dir() {
-  $working_logs = array();
-  $working_dir = wmfa_execute('get_working_log_dir');
-  //do the directory read and cache the results in the static
-  if (!$handle = opendir($working_dir)) {
-    die(__FUNCTION__ . ": Can't open directory. We should have noticed earlier 
(in setup_required_directories) that this wasn't going to work. \n");
-  }
-  while (( $file = readdir($handle) ) !== false) {
-    $temp_date = false;
-    if (wmf_audit_get_filetype($file) === 'working_log') {
-      $full_path = $working_dir . $file;
-      $temp_date = wmfa_execute('get_working_log_file_date', array('file' => 
$file)); //($file);
-    }
-    if (!$temp_date) {
-      continue;
-    }
-    $working_logs[$temp_date] = $full_path;
-  }
-  return $working_logs;
-}
-
-/**
- * Figures out what type of file you've got there, according to what the 
gateway
- * module has defined in its _regex_for_ functions.
- * @param string $file Full path to the file of interest
- * @return mixed 
'recon'|'working_log'|'uncompressed_log'|'compressed_log'|false
- * if it's nothing the gateway recognizes.
- */
-function wmf_audit_get_filetype($file) {
-  //we have three types of files, right? compressed, uncompressed, distilled, 
and recon file.
-  //...four. Four types.
-
-  $types = array(
-    'recon',
-    'working_log',
-    'uncompressed_log',
-    'compressed_log',
-  );
-
-  foreach ($types as $type) {
-    $function_name = 'regex_for_' . $type;
-    if (preg_match(wmfa_execute($function_name), $file)) {
-      return $type;
-    }
-  }
-
-  return false;
-}
-
-/**
  * Moves recon files to the completed directory. This should probably only be
  * done at the beginning of a run: If we're running in queue flood mode, we
  * don't know if the data will actually make it all the way in.
  * @param string $file Full path to the file we want to move off
+ * @param string $completed_dir Path to the recon completed directory.
  * @return boolean true on success, otherwise false
  */
-function wmf_audit_move_completed_recon_file($file) {
-  $files_directory = wmfa_execute('get_recon_completed_dir');
-  $completed_dir = $files_directory;
+function wmf_audit_move_completed_recon_file($file, $completed_dir) {
   if (!is_dir($completed_dir)) {
     if (!mkdir($completed_dir, 0770)) {
       $message = "Could not make $completed_dir";
@@ -728,121 +131,6 @@
   }
   wmf_audit_echo("Moved $file to $newfile");
   return true;
-}
-
-/**
- * Make sure all the directories we need are there.
- * @return boolean true on success, otherwise false
- */
-function wmf_audit_setup_required_directories() {
-  $directories = array(
-    'log_archive' => wmf_audit_get_log_archive_dir(),
-    'recon' => wmfa_execute('get_recon_dir'),
-    'log_working' => wmfa_execute('get_working_log_dir'),
-    'recon_completed' => wmfa_execute('get_recon_completed_dir'),
-  );
-
-  foreach ($directories as $id => $dir) {
-    if (!is_dir($dir)) {
-      if ($id === 'log_archive' || $id === 'recon') {
-        //already done. We don't want to try to create these, because required 
files come from here.
-        wmf_audit_log_error("Missing required directory $dir", 'MISSING_DIR_' 
. strtoupper($id));
-        return false;
-      }
-
-      if (!mkdir($dir, 0770)) {
-        wmf_audit_log_error("Missing and could not create required directory 
$dir", 'MISSING_DIR_' . strtoupper($id));
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-/**
- * Executes a function that is expected to exist in the relevant submodule
- * defined at runtime. Fails angrily if that function doesn't exist.
- * @param string $function_name_base Everything after the submodule . '_audit_'
- * in the function you're trying to call.
- * @param array $parameters The parameters you want to send to the function, in
- * array format.
- * @return mixed The return value of the specified function.
- */
-function wmfa_execute($function_name_base, $parameters = array()) {
-  $submod_prefix = wmf_audit_runtime_options('submod_prefix');
-  $function_name = $submod_prefix . '_audit_' . $function_name_base;
-  if (function_exists($function_name)) {
-    return call_user_func_array($function_name, $parameters);
-  } else {
-    //serious issues, you have.
-    wmf_audit_log_error("Missing required function $function_name", 
'MISSING_FUNCTION'); //should be fatal
-    die(); //this really should die here. For real, though.
-  }
-  return false; //no way to get here with all the dying
-}
-
-/**
- * Check the database to see if we have already recorded the transactions
- * present in the recon files.
- * @param array $transactions An array of transactions we have already parsed
- * out from the recon files.
- * @return mixed An array of transactions that are not in the database already,
- * or false if something goes wrong enough
- */
-function wmf_audit_get_missing_transactions($transactions) {
-  if (empty($transactions)) {
-    wmf_audit_echo(__FUNCTION__ . ': No transactions to find. Returning.');
-    return false;
-  }
-  //go through the transactions and check to see if they're in civi
-  //@TODO: RECURRING. Won't matter for WP initially, though, so I'm leaving 
that for the WX integration phase.
-  $missing = array(
-    'main' => array(),
-    'negative' => array(),
-    'recurring' => array(),
-  );
-  foreach ($transactions as $transaction) {
-    if (wmfa_execute('record_is_refund', array('record' => $transaction)) || 
wmfa_execute('record_is_chargeback', array('record' => $transaction))) { 
//negative
-      if (wmfa_execute('negative_transaction_exists_in_civi', 
array('transaction' => $transaction)) === false) {
-        wmf_audit_echo('-'); //add a subtraction. I am the helpfulest comment 
ever.
-        $missing['negative'][] = $transaction;
-      } else {
-        wmf_audit_echo('.');
-      }
-    } else { //normal type
-      if (wmfa_execute('main_transaction_exists_in_civi', array('transaction' 
=> $transaction)) === false) {
-        wmf_audit_echo('!');
-        $missing['main'][] = $transaction;
-      } else {
-        wmf_audit_echo('.');
-      }
-    }
-  }
-  return $missing;
-}
-
-/**
- * Visualization helper. Returns the character we want to display for the kind
- * of transaction we have just parsed out of a recon file.
- * @param array $record A single transaction from a recon file
- * @return char A single char to display in the char block.
- */
-function wmf_audit_echochar($record) {
-
-  if (wmfa_execute('record_is_refund', array('record' => $record))) {
-    return 'r';
-  }
-
-  if (wmfa_execute('record_is_chargeback', array('record' => $record))) {
-    return 'b';
-  }
-
-  if ($record['payment_method'] === 'cc') {
-    return 'c';
-  }
-
-  echo print_r($record, true);
-  die(__FUNCTION__ . " Not cc...");
 }
 
 /**
@@ -892,6 +180,7 @@
  * A confusing function for holding local runtime vars without having to goof
  * around with globals. Works as both get and set, depending on what you pass 
in
  * with $confusing_thing
+ * TODO: Stop that.  Some vars should be global, most should be private to the 
audit class.
  * @staticvar array $args The arguments set at runtime via drush command
  * @param array|string|null $confusing_thing A confusing thing.
  * * When it is an array, sets the internal static $args variable to that array
@@ -936,6 +225,7 @@
 /**
  * Returns an array of errors that should not cause the script to blow up, but
  * which will probably still cause messages to get thrown out.
+ * TODO: Use WmfException instead.
  * @return boolean true if the error code is fatal, otherwise false.
  */
 function wmf_audit_error_isfatal($error) {
@@ -958,113 +248,6 @@
   } else {
     return true;
   }
-}
-
-
-/**
- * Returns supplemental data for the relevant order id from the specified
- * payments log, if there is any in there. This is the only sure place we can
- * catch data we didn't save, as we're basically grepping for the exact data we
- * sent to the gateway.
- * It should be noted that this function makes no attempt to normalize the 
data.
- * If this log doesn't contain data for the order_id in question, return false.
- * @param string $order_id The order id (transaction id) of the missing payment
- * @param string $log The full path to the log we want to search
- * @return array|boolean The data we sent to the gateway for that order id, or
- * false if we can't find it there.
- */
-function wmf_audit_get_xml_log_data_by_order_id($order_id, $log) {
-  if (!$order_id) {
-    return false;
-  }
-
-  $cmd = 'grep ' . wmfa_execute('get_log_line_grep_string', array('order_id' 
=> $order_id)) . ' ' . $log;
-  wmf_audit_echo(__FUNCTION__ . ' ' . $cmd, true);
-
-  $ret = array();
-  exec(escapeshellcmd($cmd), $ret, $errorlevel);
-
-  if (count($ret) > 0) {
-
-    //In this wonderful new world, we only expect one line.
-    if (count($ret) > 1) {
-      wmf_audit_echo("Odd: More than one logline returned for $order_id. 
Investigation Required.");
-    }
-
-    //just take the last one, just in case somebody did manage to do a 
duplicate.
-    $line = $ret[count($ret) - 1];
-    // $linedata for *everything* from payments goes Month, day, time, box, 
bucket, CTID:OID, absolute madness with lots of unpredictable spaces.
-    $linedata = explode(' ', $line);
-    $contribution_id = explode(':', $linedata[5]);
-    $contribution_id = $contribution_id[0];
-
-
-    //look for the raw xml
-    $full_xml = false;
-    $node = wmfa_execute('get_log_line_xml_outermost_node');
-    $xmlstart = strpos($line, '<?xml');
-    if ($xmlstart === false) {
-      $xmlstart = strpos($line, "<$node>");
-    }
-    $xmlend = strpos($line, "</$node>");
-    if ($xmlend) {
-      $full_xml = true;
-      $xmlend += (strlen($node) + 3);
-      $xml = substr($line, $xmlstart, $xmlend - $xmlstart);
-    } else {
-      //this is a broken line, and it won't load... but we can still parse 
what's left of the thing, the slow way.
-      $xml = substr($line, $xmlstart);
-    }
-    // Syslog wart.  Other control characters should be encoded normally.
-    $xml = str_replace( '#012', "\n", $xml );
-
-    $donor_data = array();
-
-    if ($full_xml) {
-      $xmlobj = new DomDocument;
-      $xmlobj->loadXML($xml);
-
-      $parent_nodes = wmfa_execute('get_log_line_xml_parent_nodes');
-
-      if (empty($parent_nodes)) {
-        wmf_audit_log_error(__FUNCTION__ . ': No parent nodes defined. Can not 
continue.', 'RUNTIME_ERROR');
-        die("Stop dying here");
-      }
-
-      foreach ($parent_nodes as $parent_node) {
-        foreach ($xmlobj->getElementsByTagName($parent_node) as $node) {
-          foreach ($node->childNodes as $childnode) {
-            if (trim($childnode->nodeValue) != '') {
-              $donor_data[$childnode->nodeName] = $childnode->nodeValue;
-            }
-          }
-        }
-      }
-    } else {
-      //the XML got cut off prematurely, probably because syslog was set up to 
truncate on payments.
-      //rebuild what we can the old-fashioned way.
-      $search_for_nodes = wmfa_execute('get_log_line_xml_data_nodes');
-
-      if (empty($search_for_nodes)) {
-        wmf_audit_log_error(__FUNCTION__ . ': No searchable nodes defined. Can 
not continue.', 'RUNTIME_ERROR');
-        die("Stop dying here");
-      }
-      foreach ($search_for_nodes as $node) {
-        $tmp = wmf_audit_get_partial_xml_node_value($node, $xml);
-        if (!is_null($tmp)) {
-          $donor_data[$node] = $tmp;
-        }
-      }
-    }
-
-    if (!empty($donor_data)) {
-      $donor_data['contribution_tracking_id'] = $contribution_id;
-      return $donor_data;
-    } else {
-      wmf_audit_log_error("We found a transaction in the logs for $order_id, 
but there's nothing left after we tried to grab its data. Investigation 
required.", 'DATA_WEIRD');
-    }
-  }
-  return false; //no big deal, it just wasn't there. This will happen most of 
the time.
 }
 
 /**
@@ -1097,7 +280,6 @@
   $value = substr($xml, $valstart, $valend - $valstart);
   return $value;
 }
-
 
 /**
  * Returns the contribution tracking data for $record, if we can find it.

-- 
To view, visit https://gerrit.wikimedia.org/r/220714
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I60da6b9ca7f542a32cf01ff8446ac7e68f866e6e
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/fundraising/crm
Gerrit-Branch: master
Gerrit-Owner: Awight <awi...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to