Awight has uploaded a new change for review. https://gerrit.wikimedia.org/r/220714
Change subject: WIP Minimum split of BaseAuditProcessor from functions that don't require context ...................................................................... WIP Minimum split of BaseAuditProcessor from functions that don't require context TODO: * Define interface that must be implemented by subclasses. * Port WorldPay and GlobalCollect. Change-Id: I60da6b9ca7f542a32cf01ff8446ac7e68f866e6e --- A sites/all/modules/wmf_audit/BaseAuditProcessor.php M sites/all/modules/wmf_audit/wmf_audit.info M sites/all/modules/wmf_audit/wmf_audit.module 3 files changed, 837 insertions(+), 824 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/crm refs/changes/14/220714/1 diff --git a/sites/all/modules/wmf_audit/BaseAuditProcessor.php b/sites/all/modules/wmf_audit/BaseAuditProcessor.php new file mode 100644 index 0000000..b470f7d --- /dev/null +++ b/sites/all/modules/wmf_audit/BaseAuditProcessor.php @@ -0,0 +1,830 @@ +<?php + +class BaseAuditProcessor { + protected $options; + + public function __construct( $options ) { + $this->options = $options; + // FIXME: Copy to confusing global thing. + wmf_audit_runtime_options( $options ); + } + + protected function get_runtime_options( $name ) { + if ( isset( $this->options[$name] ) ) { + return $this->options[$name]; + } else { + return null; + } + } + + /** + * The main function, intended to be called straight from the drush command and + * nowhere else. + */ + public function run() { + civicrm_initialize(); + + //make sure all the things we need are there. + if (!$this->setup_required_directories()) { + throw new Exception( 'Missing required directories' ); + } + + //find out what the situation is with the available recon files, by date + $recon_files = $this->get_all_recon_files(); + + //get missing transactions from one or more recon files + //let's just assume that the default mode will be to pop off the top three (most recent) at this point. :) + //...Three, because Shut Up. + $count = count($recon_files); + if ($count > 3 && !$this->get_runtime_options('run_all')) { + $count = 3; + } + + $total_missing = array(); + $recon_file_stats = array(); + + for ($i = 0; $i < $count; ++$i) { + $parsed = array(); + $missing = array(); + + //parce the recon files into something relatively reasonable. + $file = array_pop($recon_files); + wmf_audit_echo("Parsing $file"); + $start_time = microtime(true); + $parsed = $this->parse_recon_file( $file ); + $time = microtime(true) - $start_time; + wmf_audit_echo(count($parsed) . " results found in $time seconds\n"); + + //remove transactions we already know about + $start_time = microtime(true); + $missing = $this->get_missing_transactions($parsed); + $recon_file_stats[$file] = wmf_audit_count_missing($missing); + $time = microtime(true) - $start_time; + wmf_audit_echo(wmf_audit_count_missing($missing) . ' missing transactions (of a possible ' . count($parsed) . ") identified in $time seconds\n"); + + //If the file is empty, move it off. + // Note that we are not archiving files that have missing transactions, + // which might be resolved below. Those are archived on the next run, + // once we can confirm they have hit Civi and are no longer missing. + if (wmf_audit_count_missing($missing) <= $this->get_runtime_options('recon_complete_count')) { + wmf_audit_move_completed_recon_file($file, $this->get_recon_completed_dir()); + } + + //grumble... + if (!empty($missing)) { + foreach ($missing as $type => $data) { + if (!empty($missing[$type])) { + if (array_key_exists($type, $total_missing)) { + $total_missing[$type] = array_merge($total_missing[$type], $missing[$type]); + } else { + $total_missing[$type] = $missing[$type]; + } + } + } + } + } + $total_missing_count = wmf_audit_count_missing($total_missing); + wmf_audit_echo("$total_missing_count total missing transactions identified at start"); + + //get the date distribution on what's left... for ***main transactions only*** + //That should be to say: The things that are totally in the payments logs. + //Other things, we will have to look other places, or just rebuild. + $missing_by_date = array(); + if (array_key_exists('main', $total_missing) && !empty($total_missing['main'])) { + foreach ($total_missing['main'] as $record) { + $missing_by_date[$this->get_record_human_date( $record )][] = $record; + } + } + + + $remaining = null; + if (!empty($missing_by_date)) { + $remaining['main'] = $this->log_hunt_and_send($missing_by_date); + } + + //@TODO: Handle the recurring type, once we have a gateway that gives some of those to us. + // + //Handle the negatives now. That way, the parent transactions will probably exist. + wmf_audit_echo("Processing 'negative' transactions"); + $neg_count = 0; + + if (array_key_exists('negative', $total_missing) && !empty($total_missing['negative'])) { + foreach ($total_missing['negative'] as $record) { + //check to see if the parent exists. If it does, normalize and send. + if (worldpay_audit_main_transaction_exists_in_civi($record)) { + $normal = $this->normalize_negative( $record ); + if (wmf_audit_send_transaction($normal, 'negative')) { + $neg_count += 1; + wmf_audit_echo('!'); + } + wmf_audit_echo('X'); + } else { + //@TODO: Some version of makemissing should make these, too. Gar. + $remaining['negative'][$this->get_record_human_date( $record )][] = $record; + wmf_audit_echo('.'); + } + } + wmf_audit_echo("Processed $neg_count 'negative' transactions\n"); + } + + //Wrap it up and put a bow on it. + //@TODO much later: Make a fredge table for these things and dump some messages over there about what we just did. + $missing_main = 0; + $missing_negative = 0; + $missing_recurring = 0; + $missing_at_end = 0; + if (is_array($remaining) && !empty($remaining)) { + foreach ($remaining as $type => $data) { + $count = wmf_audit_count_missing($data); + ${'missing_' . $type} = $count; + $missing_at_end += $count; + } + } + + $wrap_up = "\nDone! Final stats:\n"; + $wrap_up .= "Total missing at start: $total_missing_count\n"; + $wrap_up .= 'Missing at end: ' . $missing_at_end . "\n\n"; + + if ($missing_at_end > 0) { + $wrap_up .= "Missing transaction summary:\n"; + $wrap_up .= "Regular donations: $missing_main\n"; + if ($missing_main > 0) { + foreach ($remaining['main'] as $date => $missing) { + if (count($missing) > 0) { + $wrap_up .= "\t$date: " . count($missing) . "\n"; + } + } + } + $wrap_up .= "Refunds and chargebacks: $missing_negative\n"; + if ($missing_negative > 0) { + foreach ($remaining['negative'] as $date => $missing) { + if (count($missing) > 0) { + $wrap_up .= "\t$date: " . count($missing) . "\n"; + } + } + } + $wrap_up .= "Recurring donations: $missing_recurring\n"; + if ($missing_recurring > 0) { + foreach ($remaining['recurring'] as $date => $missing) { + if (count($missing) > 0) { + $wrap_up .= "\t$date: " . count($missing) . "\n"; + } + } + } + + $wrap_up .= "Transaction IDs:\n"; + foreach ($remaining as $group => $transactions) { + foreach ($transactions as $date => $missing) { + foreach ($missing as $transaction) { + $wrap_up .= "\t" . WmfTransaction::from_message($transaction)->get_unique_id() . "\n"; + } + } + } + + $wrap_up .= 'Initial stats on recon files: ' . print_r($recon_file_stats, true) . "\n"; + } + + wmf_audit_echo($wrap_up); + } + + /** + * Returns an array of the full paths to all valid reconciliation files + * @return array Full paths to all recon files + */ + function get_all_recon_files() { + $files_directory = $this->get_recon_dir(); + //foreach file in the directory, if it matches our pattern, add it to the array. + $files = array(); + if ($handle = opendir($files_directory)) { + while (( $file = readdir($handle) ) !== false) { + if ($this->get_filetype($file) === 'recon') { + $filedate = $this->get_recon_file_date( $file ); //which is not the same thing as the edited date... probably. + $files[$filedate] = $files_directory . $file; + } + } + closedir($handle); + ksort($files); + return $files; + } else { + //can't open the directory at all. Problem. + wmf_audit_log_error("Can't open directory $files_directory", 'FILE_DIR_MISSING'); //should be fatal + } + return false; + } + + /** + * Go transaction hunting in the payments logs. This is by far the most + * processor-intensive part, but we have some timesaving new near-givens to work + * with. + * Things to remember: + * The date on the payments log, probably doesn't contain much of that actual + * date. It's going to be the previous day, mostly. + * Also, remember logrotate exists, so it might be the next day before we get + * the payments log we would be most interested in today. + * + * @param array $missing_by_date An array of all the missing transactions we + * have pulled out of the nightlies, indexed by the standard WMF date format. + * @return mixed An array of transactions we couldn't find or deal with (by + * date), or false on error + */ + function log_hunt_and_send($missing_by_date) { + + if (empty($missing_by_date)) { + wmf_audit_echo(__FUNCTION__ . ': No missing transactions sent to this function. Aborting.'); + return false; + } + + ksort($missing_by_date); + + //output the initial counts for each index... + $earliest = null; + $latest = null; + foreach ($missing_by_date as $date => $data) { + if (is_null($earliest)) { + $earliest = $date; + } + $latest = $date; + wmf_audit_echo($date . " : " . count($data)); + } + wmf_audit_echo("\n"); + + + //REMEMBER: Log date is a liar! + //Stepping backwards, log date really means "Now you have all the data for + //this date, and some from the previous." + //Stepping forwards, it means "Now you have all the data from the previous + //date, and some from the next." + // + //Come up with the full range of logs to grab + //go back the number of days we have configured to search in the past for the + //current gateway + $earliest = wmf_common_date_add_days($earliest, -1 * $this->get_log_days_in_past()); + + //and add one to the latest to compensate for logrotate... unless that's the future. + $today = wmf_common_date_get_today_string(); + $latest = wmf_common_date_add_days($latest, 1); + if ($today < $latest) { + $latest = $today; + } + + //Correct for the date gap function being exclusive on the starting date param + //More explain above. + $earliest -= 1; + + //get the array of all the logs we want to check + $logs_to_grab = wmf_common_date_get_date_gap($earliest, $latest); + + if (empty($logs_to_grab)) { + wmf_audit_log_error(__FUNCTION__ . ': No logs identified as grabbable. Aborting.', 'RUNTIME_ERROR'); + return false; + } + + //want the latest first, from now on. + rsort($logs_to_grab); + krsort($missing_by_date); + + //Foreach log by date DESC, check all the transactions we are missing that might possibly be in this log. + //This is going to look a little funny, because the logs are date-named and stamped after the day they rotate; Not after the dates for all the data in them. + //As such, they mostly contain data for the previous day (but not exclusively, and not all of it) + $tryme = array(); + foreach ($logs_to_grab as $log_date) { + //Add to the pool of what's possible to find in this log, as we step backward through the log dates. + //If the log date is less than or equal to the date on the transaction + //(which may or may not be when it was initiated, but that's the past-iest + //option), and it hasn't already been added to the pool, add it to the pool. + //As we're stepping backward, we should look for transactions that come + //from the current log date, or the one before. + foreach ($missing_by_date as $date => $data) { + if ($date >= ($log_date - 1)) { + if (!array_key_exists($date, $tryme)) { + wmf_audit_echo("Adding date $date to the date pool for log date $log_date"); + $tryme[$date] = $data; + } + } else { + break; + } + } + + //log something sensible out for what we're about to do + $display_dates = array(); + if (!empty($tryme)) { + foreach ($tryme as $date => $thing) { + if (count($thing) > 0) { + $display_dates[$date] = count($thing); + } + } + } + $log = false; + if (!empty($display_dates)) { + $message = "Checking log $log_date for missing transactions that came in with the following dates: "; + foreach ($display_dates as $display_date => $local_count) { + $message .= "\n\t$display_date : $local_count"; + } + wmf_audit_echo($message); + + //now actually check the log from $log_date, for the missing transactions in $tryme + // Get the prepped log with the current date, returning false if it's not there. + $log = $this->get_log_by_date($log_date); + } + + if ($log) { + //check to see if the missing transactions we're trying now, are in there. + //Echochar with results for each one. + foreach ($tryme as $date => $missing) { + if (!empty($missing)) { + wmf_audit_echo("Log Date: $log_date: About to check " . count($missing) . " missing transactions from $date", true); + $checked = 0; + $found = 0; + foreach ($missing as $id => $transaction) { + $checked += 1; + //reset vars used below, for extra safety + $order_id = false; + $data = false; + $all_data = false; + $contribution_tracking_data = false; + $error = false; + + $order_id = $this->get_order_id( $transaction ); + if (!$order_id) { + $error = array( + 'message' => 'Could not get an order id for the following transaction ' . print_r($transaction, true), + 'code' => 'MISSING_MANDATORY_DATA', + ); + } else { + //@TODO: If you ever have a gateway that doesn't communicate with xml, this is going to have to be abstracted slightly. + //Not going to worry about that right now, though. + $data = $this->get_xml_log_data_by_order_id($order_id, $log); + } + + //if we have data at this point, it means we have a match in the logs + if ($data) { + $found += 1; + $all_data = $this->normalize_and_merge_data( $data, $transaction ); + if (!$all_data) { + $error = array( + 'message' => 'Error normalizing data. Skipping the following: ' . print_r($transaction, true) . "\n" . print_r($data, true), + 'code' => 'NORMALIZE_DATA', + ); + } + if (!$error) { + //lookup contribution_tracking data, and fill it in with audit markers if there's nothing there. + $contribution_tracking_data = wmf_audit_get_contribution_tracking_data($all_data); + } + + if (!$contribution_tracking_data) { + $error = array( + 'message' => 'No contribution trackin data retrieved for transaction ' . print_r($all_data, true), + 'code' => 'MISSING_MANDATORY_DATA', + ); + } + + if (!$error) { + //Now that we've made it this far: Easy check to make sure we're even looking at the right thing... + //I'm not totally sure this is going to be the right thing to do, though. Intended fragility. + if ((!$this->get_runtime_options('fakedb')) && + (!empty($contribution_tracking_data['utm_payment_method'])) && + ($contribution_tracking_data['utm_payment_method'] !== $all_data['payment_method'])) { + + $message = 'Payment method mismatch between utm tracking data(' . $contribution_tracking_data['utm_payment_method']; + $message .= ') and normalized log and recon data(' . $all_data['payment_method'] . '). Investigation required.'; + $error = array( + 'message' => $message, + 'code' => 'UTM_DATA_MISMATCH', + ); + } else { + unset($contribution_tracking_data['utm_payment_method']); + // On the next line, the date field from all_data will win, which we totally want. + // I had thought we'd prefer the contribution tracking date, but that's just silly. + // However, I'd just like to point out that it would be terribly enlightening for some gateways to log the difference... + // ...but not inside the char block, because it'll break the pretty. + $all_data = array_merge($contribution_tracking_data, $all_data); + } + + if (!$error) { + //Send to stomp. Or somewhere. Or don't (if it's test mode). + wmf_audit_send_transaction($all_data, 'main'); + unset($tryme[$date][$id]); + wmf_audit_echo('!'); + } + } + + } else { + //no data found in this log, which is expected and normal and not a problem. + wmf_audit_echo('.'); + } + + //End of the transaction search/destroy loop. If we're here and have + //an error, we found something and the re-fusion didn't work. + //Handle consistently, and definitely don't try looking in other + //logs. + if (is_array($error)) { + wmf_audit_log_error($error['message'], $error['code']); + unset($tryme[$date][$id]); + wmf_audit_echo('X'); + } + } + wmf_audit_echo("Log Date: $log_date: Checked $checked missing transactions from $date, and found $found\n"); + } + } + } + } + + //That loop has been stepping back in to the past. So, use what we have... + wmf_audit_remove_old_logs($log_date, $this->read_working_logs_dir()); + + //if we are running in makemissing mode: make the missing transactions. + if ($this->get_runtime_options('makemissing')) { + $missing_count = wmf_audit_count_missing($tryme); + if ($missing_count === 0) { + wmf_audit_echo('No further missing transactions to make.'); + } else { + //today minus three. Again: The three is because Shut Up. + wmf_audit_echo("Making up to $missing_count missing transactions:"); + $made = 0; + $cutoff = wmf_common_date_add_days(wmf_common_date_get_today_string(), -3); + foreach ($tryme as $date => $missing) { + if ((int) $date <= (int) $cutoff) { + foreach ($missing as $id => $message) { + $contribution_tracking_data = wmf_audit_make_contribution_tracking_data($message); + $all_data = array_merge($message, $contribution_tracking_data); + $sendme = $this->normalize_partial( $all_data ); + wmf_audit_send_transaction($sendme, 'main'); + $made += 1; + wmf_audit_echo('!'); + unset($tryme[$date][$id]); + } + } + } + wmf_audit_echo("Made $made missing transactions\n"); + } + } + + return $tryme; //this will contain whatever's left, if we haven't errored out at this point + } + + /** + * Both groom and return a distilled working payments log ready to be searched + * for missing transaction data + * @param string $date The date of the log we want to grab + */ + function get_log_by_date($date) { + //Could be distilled already. + //Could be either in .gz format in the archive + //check for the distilled version first + //check the local static cache to see if the file we want is available in distilled format. + static $ready_files = null; + + if (is_null($ready_files)) { + $ready_files = $this->read_working_logs_dir(); + } + + //simple case: It's already ready, or none are ready + if (!is_null($ready_files) && array_key_exists($date, $ready_files)) { + return $ready_files[$date]; + } + + //This date is not ready yet. Get the zipped version from the archive, unzip + //to the working directory, and distill. + $compressed_filename = $this->get_compressed_log_file_name( $date ); + $full_archive_path = wmf_audit_get_log_archive_dir() . $compressed_filename; + $working_directory = $this->get_working_log_dir(); + $cleanup = array(); //add files we want to make sure aren't there anymore when we're done here. + if (file_exists($full_archive_path)) { + wmf_audit_echo("Retrieving $full_archive_path"); + $cmd = "cp $full_archive_path " . $working_directory; + exec(escapeshellcmd($cmd), $ret, $errorlevel); + if (!file_exists($working_directory . $compressed_filename)) { + wmf_audit_log_error("FILE PROBLEM: Trying to get log archives, and something went wrong with $cmd", 'FILE_MOVE'); + return false; + } else { + $cleanup[] = $working_directory . $compressed_filename; + } + //uncompress + wmf_audit_echo("Gunzipping $working_directory$compressed_filename"); + $cmd = "gunzip -f $working_directory$compressed_filename"; + exec(escapeshellcmd($cmd), $ret, $errorlevel); + //now check to make sure the file you expect, actually exists + $uncompressed_file = $this->get_uncompressed_log_file_name( $date ); + if (!file_exists($working_directory . $uncompressed_file)) { + wmf_audit_log_error("FILE PROBLEM: Something went wrong with uncompressing logs: $cmd : $working_directory.$uncompressed_file doesn't exist.", 'FILE_UNCOMPRESS'); + } else { + $cleanup[] = $working_directory . $uncompressed_file; + } + + //distill & cache locally + $distilled_file = $this->get_working_log_file_name( $date ); + //Can't escape the hard-coded string we're grepping for, because it breaks terribly. + $cmd = "grep '" . $this->get_log_distilling_grep_string() . "' " . escapeshellcmd($working_directory . $uncompressed_file) . " > " . escapeshellcmd($working_directory . $distilled_file); + + wmf_audit_echo($cmd); + $ret = array(); + exec($cmd, $ret, $errorlevel); + chmod($working_directory . $distilled_file, 0770); + $ready_files[$date] = $working_directory . $distilled_file; + + //clean up + if (!empty($cleanup)) { + foreach ($cleanup as $deleteme) { + if (file_exists($deleteme)) { + unlink($deleteme); + } + } + } + + //return + return $working_directory . $distilled_file; + } + + //this happens if the archive file doesn't exist. Definitely not the end of the world, but we should probably log about it. + wmf_audit_log_error("Archive file $full_archive_path seems not to exist\n", 'MISSING_PAYMENTS_LOG'); + return false; + } + + /** + * Construct an array of all the distilled working logs we have in the working + * directory. + * @return array Array of date => full path to file for all distilled working + * logs + */ + function read_working_logs_dir() { + $working_logs = array(); + $working_dir = $this->get_working_log_dir(); + //do the directory read and cache the results in the static + if (!$handle = opendir($working_dir)) { + die(__FUNCTION__ . ": Can't open directory. We should have noticed earlier (in setup_required_directories) that this wasn't going to work. \n"); + } + while (( $file = readdir($handle) ) !== false) { + $temp_date = false; + if ($this->get_filetype($file) === 'working_log') { + $full_path = $working_dir . $file; + $temp_date = $this->get_working_log_file_date( $file ); + } + if (!$temp_date) { + continue; + } + $working_logs[$temp_date] = $full_path; + } + return $working_logs; + } + + /** + * Figures out what type of file you've got there, according to what the gateway + * module has defined in its _regex_for_ functions. + * @param string $file Full path to the file of interest + * @return mixed 'recon'|'working_log'|'uncompressed_log'|'compressed_log'|false + * if it's nothing the gateway recognizes. + */ + function get_filetype($file) { + //we have three types of files, right? compressed, uncompressed, distilled, and recon file. + //...four. Four types. + + $types = array( + 'recon', + 'working_log', + 'uncompressed_log', + 'compressed_log', + ); + + foreach ($types as $type) { + $function_name = 'regex_for_' . $type; + // TODO: map rather than functions + if (preg_match($this->$function_name(), $file)) { + return $type; + } + } + + return false; + } + + /** + * Moves recon files to the completed directory. This should probably only be + * done at the beginning of a run: If we're running in queue flood mode, we + * don't know if the data will actually make it all the way in. + * @param string $file Full path to the file we want to move off + * @return boolean true on success, otherwise false + */ + function move_completed_recon_file($file) { + $files_directory = $this->get_recon_completed_dir(); + $completed_dir = $files_directory; + if (!is_dir($completed_dir)) { + if (!mkdir($completed_dir, 0770)) { + $message = "Could not make $completed_dir"; + wmf_audit_log_error($message, 'FILE_PERMS'); + return false; + } + } + + $filename = basename($file); + $newfile = $completed_dir . '/' . $filename; + + if (!rename($file, $newfile)) { + $message = "Unable to move $file to $newfile"; + + wmf_audit_log_error($message, 'FILE_PERMS'); + return false; + } else { + chmod($newfile, 0770); + } + wmf_audit_echo("Moved $file to $newfile"); + return true; + } + + /** + * Make sure all the directories we need are there. + * @return boolean true on success, otherwise false + */ + function setup_required_directories() { + $directories = array( + 'log_archive' => wmf_audit_get_log_archive_dir(), + 'recon' => $this->get_recon_dir(), + 'log_working' => $this->get_working_log_dir(), + 'recon_completed' => $this->get_recon_completed_dir(), + ); + + foreach ($directories as $id => $dir) { + if (!is_dir($dir)) { + if ($id === 'log_archive' || $id === 'recon') { + //already done. We don't want to try to create these, because required files come from here. + wmf_audit_log_error("Missing required directory $dir", 'MISSING_DIR_' . strtoupper($id)); + return false; + } + + if (!mkdir($dir, 0770)) { + wmf_audit_log_error("Missing and could not create required directory $dir", 'MISSING_DIR_' . strtoupper($id)); + return false; + } + } + } + return true; + } + + /** + * Check the database to see if we have already recorded the transactions + * present in the recon files. + * @param array $transactions An array of transactions we have already parsed + * out from the recon files. + * @return mixed An array of transactions that are not in the database already, + * or false if something goes wrong enough + */ + function get_missing_transactions($transactions) { + if (empty($transactions)) { + wmf_audit_echo(__FUNCTION__ . ': No transactions to find. Returning.'); + return false; + } + //go through the transactions and check to see if they're in civi + //@TODO: RECURRING. Won't matter for WP initially, though, so I'm leaving that for the WX integration phase. + $missing = array( + 'main' => array(), + 'negative' => array(), + 'recurring' => array(), + ); + foreach ($transactions as $transaction) { + if ($this->record_is_refund( $transaction ) || $this->record_is_chargeback( $transaction )) { //negative + if ($this->negative_transaction_exists_in_civi( $transaction ) === false) { + wmf_audit_echo('-'); //add a subtraction. I am the helpfulest comment ever. + $missing['negative'][] = $transaction; + } else { + wmf_audit_echo('.'); + } + } else { //normal type + if ($this->main_transaction_exists_in_civi( $transaction ) === false) { + wmf_audit_echo('!'); + $missing['main'][] = $transaction; + } else { + wmf_audit_echo('.'); + } + } + } + return $missing; + } + + /** + * Visualization helper. Returns the character we want to display for the kind + * of transaction we have just parsed out of a recon file. + * @param array $record A single transaction from a recon file + * @return char A single char to display in the char block. + */ + function audit_echochar($record) { + + if ($this->record_is_refund( $record )) { + return 'r'; + } + + if ($this->record_is_chargeback( $record )) { + return 'b'; + } + + if ($record['payment_method'] === 'cc') { + return 'c'; + } + + echo print_r($record, true); + die(__FUNCTION__ . " Not cc..."); + } + + /** + * Returns supplemental data for the relevant order id from the specified + * payments log, if there is any in there. This is the only sure place we can + * catch data we didn't save, as we're basically grepping for the exact data we + * sent to the gateway. + * It should be noted that this function makes no attempt to normalize the data. + * If this log doesn't contain data for the order_id in question, return false. + * @param string $order_id The order id (transaction id) of the missing payment + * @param string $log The full path to the log we want to search + * @return array|boolean The data we sent to the gateway for that order id, or + * false if we can't find it there. + */ + function get_xml_log_data_by_order_id($order_id, $log) { + if (!$order_id) { + return false; + } + + $cmd = 'grep ' . $this->get_log_line_grep_string( $order_id ) . ' ' . $log; + wmf_audit_echo(__FUNCTION__ . ' ' . $cmd, true); + + $ret = array(); + exec(escapeshellcmd($cmd), $ret, $errorlevel); + + if (count($ret) > 0) { + + //In this wonderful new world, we only expect one line. + if (count($ret) > 1) { + wmf_audit_echo("Odd: More than one logline returned for $order_id. Investigation Required."); + } + + //just take the last one, just in case somebody did manage to do a duplicate. + $line = $ret[count($ret) - 1]; + // $linedata for *everything* from payments goes Month, day, time, box, bucket, CTID:OID, absolute madness with lots of unpredictable spaces. + $linedata = explode(' ', $line); + $contribution_id = explode(':', $linedata[5]); + $contribution_id = $contribution_id[0]; + + + //look for the raw xml + $full_xml = false; + $node = $this->get_log_line_xml_outermost_node(); + $xmlstart = strpos($line, '<?xml'); + if ($xmlstart === false) { + $xmlstart = strpos($line, "<$node>"); + } + $xmlend = strpos($line, "</$node>"); + if ($xmlend) { + $full_xml = true; + $xmlend += (strlen($node) + 3); + $xml = substr($line, $xmlstart, $xmlend - $xmlstart); + } else { + //this is a broken line, and it won't load... but we can still parse what's left of the thing, the slow way. + $xml = substr($line, $xmlstart); + } + // Syslog wart. Other control characters should be encoded normally. + $xml = str_replace( '#012', "\n", $xml ); + + $donor_data = array(); + + if ($full_xml) { + $xmlobj = new DomDocument; + $xmlobj->loadXML($xml); + + $parent_nodes = $this->get_log_line_xml_parent_nodes(); + + if (empty($parent_nodes)) { + wmf_audit_log_error(__FUNCTION__ . ': No parent nodes defined. Can not continue.', 'RUNTIME_ERROR'); + die("Stop dying here"); + } + + foreach ($parent_nodes as $parent_node) { + foreach ($xmlobj->getElementsByTagName($parent_node) as $node) { + foreach ($node->childNodes as $childnode) { + if (trim($childnode->nodeValue) != '') { + $donor_data[$childnode->nodeName] = $childnode->nodeValue; + } + } + } + } + } else { + //the XML got cut off prematurely, probably because syslog was set up to truncate on payments. + //rebuild what we can the old-fashioned way. + $search_for_nodes = $this->get_log_line_xml_data_nodes(); + + if (empty($search_for_nodes)) { + wmf_audit_log_error(__FUNCTION__ . ': No searchable nodes defined. Can not continue.', 'RUNTIME_ERROR'); + die("Stop dying here"); + } + foreach ($search_for_nodes as $node) { + $tmp = wmf_audit_get_partial_xml_node_value($node, $xml); + if (!is_null($tmp)) { + $donor_data[$node] = $tmp; + } + } + } + + if (!empty($donor_data)) { + $donor_data['contribution_tracking_id'] = $contribution_id; + return $donor_data; + } else { + wmf_audit_log_error("We found a transaction in the logs for $order_id, but there's nothing left after we tried to grab its data. Investigation required.", 'DATA_WEIRD'); + } + } + return false; //no big deal, it just wasn't there. This will happen most of the time. + } +} diff --git a/sites/all/modules/wmf_audit/wmf_audit.info b/sites/all/modules/wmf_audit/wmf_audit.info index d82630a..3c1b3d5 100644 --- a/sites/all/modules/wmf_audit/wmf_audit.info +++ b/sites/all/modules/wmf_audit/wmf_audit.info @@ -7,3 +7,4 @@ dependencies[] = wmf_civicrm package = WMF Audit configure = admin/config/wmf_audit +files[] = BaseAuditProcessor.php diff --git a/sites/all/modules/wmf_audit/wmf_audit.module b/sites/all/modules/wmf_audit/wmf_audit.module index adff527..f40704c 100644 --- a/sites/all/modules/wmf_audit/wmf_audit.module +++ b/sites/all/modules/wmf_audit/wmf_audit.module @@ -65,194 +65,6 @@ } /** - * The main function, intended to be called straight from the drush command and - * nowhere else. - * @param string $submod_prefix The prefix for the gateway-specific submodule - * for which we are running the nightly audit. For any of this to work, there - * will have to be a submodule called $submod_prefix . '_audit'. - * @param array $drush_params An array of the drush parameters that were set at - * the command line. - * @return boolean - */ -function wmf_audit_main($submod_prefix, $drush_params) { - civicrm_initialize(); - - //this is more of a runtime param... maybe set it on the outside? But no, I - //kind of like that this one thing is explicitly required in an obvious way to - //even call the function. - $drush_params['submod_prefix'] = $submod_prefix; - - //This function is weird. Because we're passing in an array, it's setting the - //runtime params here. - //Everywhere else, it will work as a get function. - wmf_audit_runtime_options($drush_params); - - - //make sure all the things we need are there. - if (!wmf_audit_setup_required_directories()) { - return false; - } - - //find out what the situation is with the available recon files, by date - $recon_files = wmf_audit_get_all_recon_files(); - - //get missing transactions from one or more recon files - //let's just assume that the default mode will be to pop off the top three (most recent) at this point. :) - //...Three, because Shut Up. - $count = count($recon_files); - if ($count > 3 && !wmf_audit_runtime_options('run_all')) { - $count = 3; - } - - $total_missing = array(); - $recon_file_stats = array(); - - for ($i = 0; $i < $count; ++$i) { - $parsed = array(); - $missing = array(); - - //parce the recon files into something relatively reasonable. - $file = array_pop($recon_files); - wmf_audit_echo("Parsing $file"); - $start_time = microtime(true); - $parsed = wmfa_execute('parse_recon_file', array('file' => $file)); - $time = microtime(true) - $start_time; - wmf_audit_echo(count($parsed) . " results found in $time seconds\n"); - - //remove transactions we already know about - $start_time = microtime(true); - $missing = wmf_audit_get_missing_transactions($parsed); - $recon_file_stats[$file] = wmf_audit_count_missing($missing); - $time = microtime(true) - $start_time; - wmf_audit_echo(wmf_audit_count_missing($missing) . ' missing transactions (of a possible ' . count($parsed) . ") identified in $time seconds\n"); - - //If the file is empty, move it off. - // Note that we are not archiving files that have missing transactions, - // which might be resolved below. Those are archived on the next run, - // once we can confirm they have hit Civi and are no longer missing. - if (wmf_audit_count_missing($missing) <= wmf_audit_runtime_options('recon_complete_count')) { - wmf_audit_move_completed_recon_file($file); - } - - //grumble... - if (!empty($missing)) { - foreach ($missing as $type => $data) { - if (!empty($missing[$type])) { - if (array_key_exists($type, $total_missing)) { - $total_missing[$type] = array_merge($total_missing[$type], $missing[$type]); - } else { - $total_missing[$type] = $missing[$type]; - } - } - } - } - } - $total_missing_count = wmf_audit_count_missing($total_missing); - wmf_audit_echo("$total_missing_count total missing transactions identified at start"); - - //get the date distribution on what's left... for ***main transactions only*** - //That should be to say: The things that are totally in the payments logs. - //Other things, we will have to look other places, or just rebuild. - $missing_by_date = array(); - if (array_key_exists('main', $total_missing) && !empty($total_missing['main'])) { - foreach ($total_missing['main'] as $record) { - $missing_by_date[wmfa_execute('get_record_human_date', array('record' => $record))][] = $record; - } - } - - - $remaining = null; - if (!empty($missing_by_date)) { - $remaining['main'] = wmf_audit_log_hunt_and_send($missing_by_date); - } - - //@TODO: Handle the recurring type, once we have a gateway that gives some of those to us. - // - //Handle the negatives now. That way, the parent transactions will probably exist. - wmf_audit_echo("Processing 'negative' transactions"); - $neg_count = 0; - - if (array_key_exists('negative', $total_missing) && !empty($total_missing['negative'])) { - foreach ($total_missing['negative'] as $record) { - //check to see if the parent exists. If it does, normalize and send. - if (worldpay_audit_main_transaction_exists_in_civi($record)) { - $normal = wmfa_execute('normalize_negative', array('record' => $record)); - if (wmf_audit_send_transaction($normal, 'negative')) { - $neg_count += 1; - wmf_audit_echo('!'); - } - wmf_audit_echo('X'); - } else { - //@TODO: Some version of makemissing should make these, too. Gar. - $remaining['negative'][wmfa_execute('get_record_human_date', array('record' => $record))][] = $record; - wmf_audit_echo('.'); - } - } - wmf_audit_echo("Processed $neg_count 'negative' transactions\n"); - } - - //Wrap it up and put a bow on it. - //@TODO much later: Make a fredge table for these things and dump some messages over there about what we just did. - $missing_main = 0; - $missing_negative = 0; - $missing_recurring = 0; - $missing_at_end = 0; - if (is_array($remaining) && !empty($remaining)) { - foreach ($remaining as $type => $data) { - $count = wmf_audit_count_missing($data); - ${'missing_' . $type} = $count; - $missing_at_end += $count; - } - } - - $wrap_up = "\nDone! Final stats:\n"; - $wrap_up .= "Total missing at start: $total_missing_count\n"; - $wrap_up .= 'Missing at end: ' . $missing_at_end . "\n\n"; - - if ($missing_at_end > 0) { - $wrap_up .= "Missing transaction summary:\n"; - $wrap_up .= "Regular donations: $missing_main\n"; - if ($missing_main > 0) { - foreach ($remaining['main'] as $date => $missing) { - if (count($missing) > 0) { - $wrap_up .= "\t$date: " . count($missing) . "\n"; - } - } - } - $wrap_up .= "Refunds and chargebacks: $missing_negative\n"; - if ($missing_negative > 0) { - foreach ($remaining['negative'] as $date => $missing) { - if (count($missing) > 0) { - $wrap_up .= "\t$date: " . count($missing) . "\n"; - } - } - } - $wrap_up .= "Recurring donations: $missing_recurring\n"; - if ($missing_recurring > 0) { - foreach ($remaining['recurring'] as $date => $missing) { - if (count($missing) > 0) { - $wrap_up .= "\t$date: " . count($missing) . "\n"; - } - } - } - - $wrap_up .= "Transaction IDs:\n"; - foreach ($remaining as $group => $transactions) { - foreach ($transactions as $date => $missing) { - foreach ($missing as $transaction) { - $wrap_up .= "\t" . WmfTransaction::from_message($transaction)->get_unique_id() . "\n"; - } - } - } - - $wrap_up .= 'Initial stats on recon files: ' . print_r($recon_file_stats, true) . "\n"; - } - - wmf_audit_echo($wrap_up); - return true; -} - -/** * Counts the missing transactions in the main array of missing transactions. * This is annoying and needed its own function, because the $missing array goes * $missing[$type]=> array of transactions. @@ -273,366 +85,13 @@ } /** - * Returns an array of the full paths to all valid reconciliation files - * @return array Full paths to all recon files - */ -function wmf_audit_get_all_recon_files() { - $files_directory = wmfa_execute('get_recon_dir'); - //foreach file in the directory, if it matches our pattern, add it to the array. - $files = array(); - if ($handle = opendir($files_directory)) { - while (( $file = readdir($handle) ) !== false) { - if (wmf_audit_get_filetype($file) === 'recon') { - $filedate = wmfa_execute('get_recon_file_date', array($file)); //which is not the same thing as the edited date... probably. - $files[$filedate] = $files_directory . $file; - } - } - closedir($handle); - ksort($files); - return $files; - } else { - //can't open the directory at all. Problem. - wmf_audit_log_error("Can't open directory $files_directory", 'FILE_DIR_MISSING'); //should be fatal - } - return false; -} - -/** - * Go transaction hunting in the payments logs. This is by far the most - * processor-intensive part, but we have some timesaving new near-givens to work - * with. - * Things to remember: - * The date on the payments log, probably doesn't contain much of that actual - * date. It's going to be the previous day, mostly. - * Also, remember logrotate exists, so it might be the next day before we get - * the payments log we would be most interested in today. - * - * @param array $missing_by_date An array of all the missing transactions we - * have pulled out of the nightlies, indexed by the standard WMF date format. - * @return mixed An array of transactions we couldn't find or deal with (by - * date), or false on error - */ -function wmf_audit_log_hunt_and_send($missing_by_date) { - - if (empty($missing_by_date)) { - wmf_audit_echo(__FUNCTION__ . ': No missing transactions sent to this function. Aborting.'); - return false; - } - - ksort($missing_by_date); - - //output the initial counts for each index... - $earliest = null; - $latest = null; - foreach ($missing_by_date as $date => $data) { - if (is_null($earliest)) { - $earliest = $date; - } - $latest = $date; - wmf_audit_echo($date . " : " . count($data)); - } - wmf_audit_echo("\n"); - - - //REMEMBER: Log date is a liar! - //Stepping backwards, log date really means "Now you have all the data for - //this date, and some from the previous." - //Stepping forwards, it means "Now you have all the data from the previous - //date, and some from the next." - // - //Come up with the full range of logs to grab - //go back the number of days we have configured to search in the past for the - //current gateway - $earliest = wmf_common_date_add_days($earliest, -1 * wmfa_execute('get_log_days_in_past')); - - //and add one to the latest to compensate for logrotate... unless that's the future. - $today = wmf_common_date_get_today_string(); - $latest = wmf_common_date_add_days($latest, 1); - if ($today < $latest) { - $latest = $today; - } - - //Correct for the date gap function being exclusive on the starting date param - //More explain above. - $earliest -= 1; - - //get the array of all the logs we want to check - $logs_to_grab = wmf_common_date_get_date_gap($earliest, $latest); - - if (empty($logs_to_grab)) { - wmf_audit_log_error(__FUNCTION__ . ': No logs identified as grabbable. Aborting.', 'RUNTIME_ERROR'); - return false; - } - - //want the latest first, from now on. - rsort($logs_to_grab); - krsort($missing_by_date); - - //Foreach log by date DESC, check all the transactions we are missing that might possibly be in this log. - //This is going to look a little funny, because the logs are date-named and stamped after the day they rotate; Not after the dates for all the data in them. - //As such, they mostly contain data for the previous day (but not exclusively, and not all of it) - $tryme = array(); - foreach ($logs_to_grab as $log_date) { - //Add to the pool of what's possible to find in this log, as we step backward through the log dates. - //If the log date is less than or equal to the date on the transaction - //(which may or may not be when it was initiated, but that's the past-iest - //option), and it hasn't already been added to the pool, add it to the pool. - //As we're stepping backward, we should look for transactions that come - //from the current log date, or the one before. - foreach ($missing_by_date as $date => $data) { - if ($date >= ($log_date - 1)) { - if (!array_key_exists($date, $tryme)) { - wmf_audit_echo("Adding date $date to the date pool for log date $log_date"); - $tryme[$date] = $data; - } - } else { - break; - } - } - - //log something sensible out for what we're about to do - $display_dates = array(); - if (!empty($tryme)) { - foreach ($tryme as $date => $thing) { - if (count($thing) > 0) { - $display_dates[$date] = count($thing); - } - } - } - $log = false; - if (!empty($display_dates)) { - $message = "Checking log $log_date for missing transactions that came in with the following dates: "; - foreach ($display_dates as $display_date => $local_count) { - $message .= "\n\t$display_date : $local_count"; - } - wmf_audit_echo($message); - - //now actually check the log from $log_date, for the missing transactions in $tryme - // Get the prepped log with the current date, returning false if it's not there. - $log = wmf_audit_get_log_by_date($log_date); - } - - if ($log) { - //check to see if the missing transactions we're trying now, are in there. - //Echochar with results for each one. - foreach ($tryme as $date => $missing) { - if (!empty($missing)) { - wmf_audit_echo("Log Date: $log_date: About to check " . count($missing) . " missing transactions from $date", true); - $checked = 0; - $found = 0; - foreach ($missing as $id => $transaction) { - $checked += 1; - //reset vars used below, for extra safety - $order_id = false; - $data = false; - $all_data = false; - $contribution_tracking_data = false; - $error = false; - - $order_id = wmfa_execute('get_order_id', array('transaction' => $transaction)); - if (!$order_id) { - $error = array( - 'message' => 'Could not get an order id for the following transaction ' . print_r($transaction, true), - 'code' => 'MISSING_MANDATORY_DATA', - ); - } else { - //@TODO: If you ever have a gateway that doesn't communicate with xml, this is going to have to be abstracted slightly. - //Not going to worry about that right now, though. - $data = wmf_audit_get_xml_log_data_by_order_id($order_id, $log); - } - - //if we have data at this point, it means we have a match in the logs - if ($data) { - $found += 1; - $all_data = wmfa_execute('normalize_and_merge_data', array('xml_data' => $data, 'recon_data' => $transaction)); - if (!$all_data) { - $error = array( - 'message' => 'Error normalizing data. Skipping the following: ' . print_r($transaction, true) . "\n" . print_r($data, true), - 'code' => 'NORMALIZE_DATA', - ); - } - if (!$error) { - //lookup contribution_tracking data, and fill it in with audit markers if there's nothing there. - $contribution_tracking_data = wmf_audit_get_contribution_tracking_data($all_data); - } - - if (!$contribution_tracking_data) { - $error = array( - 'message' => 'No contribution trackin data retrieved for transaction ' . print_r($all_data, true), - 'code' => 'MISSING_MANDATORY_DATA', - ); - } - - if (!$error) { - //Now that we've made it this far: Easy check to make sure we're even looking at the right thing... - //I'm not totally sure this is going to be the right thing to do, though. Intended fragility. - if ((!wmf_audit_runtime_options('fakedb')) && - (!empty($contribution_tracking_data['utm_payment_method'])) && - ($contribution_tracking_data['utm_payment_method'] !== $all_data['payment_method'])) { - - $message = 'Payment method mismatch between utm tracking data(' . $contribution_tracking_data['utm_payment_method']; - $message .= ') and normalized log and recon data(' . $all_data['payment_method'] . '). Investigation required.'; - $error = array( - 'message' => $message, - 'code' => 'UTM_DATA_MISMATCH', - ); - } else { - unset($contribution_tracking_data['utm_payment_method']); - // On the next line, the date field from all_data will win, which we totally want. - // I had thought we'd prefer the contribution tracking date, but that's just silly. - // However, I'd just like to point out that it would be terribly enlightening for some gateways to log the difference... - // ...but not inside the char block, because it'll break the pretty. - $all_data = array_merge($contribution_tracking_data, $all_data); - } - - if (!$error) { - //Send to stomp. Or somewhere. Or don't (if it's test mode). - wmf_audit_send_transaction($all_data, 'main'); - unset($tryme[$date][$id]); - wmf_audit_echo('!'); - } - } - - } else { - //no data found in this log, which is expected and normal and not a problem. - wmf_audit_echo('.'); - } - - //End of the transaction search/destroy loop. If we're here and have - //an error, we found something and the re-fusion didn't work. - //Handle consistently, and definitely don't try looking in other - //logs. - if (is_array($error)) { - wmf_audit_log_error($error['message'], $error['code']); - unset($tryme[$date][$id]); - wmf_audit_echo('X'); - } - } - wmf_audit_echo("Log Date: $log_date: Checked $checked missing transactions from $date, and found $found\n"); - } - } - } - } - - //That loop has been stepping back in to the past. So, use what we have... - wmf_audit_remove_old_logs($log_date); - - //if we are running in makemissing mode: make the missing transactions. - if (wmf_audit_runtime_options('makemissing')) { - $missing_count = wmf_audit_count_missing($tryme); - if ($missing_count === 0) { - wmf_audit_echo('No further missing transactions to make.'); - } else { - //today minus three. Again: The three is because Shut Up. - wmf_audit_echo("Making up to $missing_count missing transactions:"); - $made = 0; - $cutoff = wmf_common_date_add_days(wmf_common_date_get_today_string(), -3); - foreach ($tryme as $date => $missing) { - if ((int) $date <= (int) $cutoff) { - foreach ($missing as $id => $message) { - $contribution_tracking_data = wmf_audit_make_contribution_tracking_data($message); - $all_data = array_merge($message, $contribution_tracking_data); - $sendme = wmfa_execute('normalize_partial', array('record' => $all_data)); - wmf_audit_send_transaction($sendme, 'main'); - $made += 1; - wmf_audit_echo('!'); - unset($tryme[$date][$id]); - } - } - } - wmf_audit_echo("Made $made missing transactions\n"); - } - } - - return $tryme; //this will contain whatever's left, if we haven't errored out at this point -} - -/** - * Both groom and return a distilled working payments log ready to be searched - * for missing transaction data - * @param string $date The date of the log we want to grab - */ -function wmf_audit_get_log_by_date($date) { - //Could be distilled already. - //Could be either in .gz format in the archive - //check for the distilled version first - //check the local static cache to see if the file we want is available in distilled format. - static $ready_files = null; - - if (is_null($ready_files)) { - $ready_files = wmf_audit_read_working_logs_dir(); - } - - //simple case: It's already ready, or none are ready - if (!is_null($ready_files) && array_key_exists($date, $ready_files)) { - return $ready_files[$date]; - } - - //This date is not ready yet. Get the zipped version from the archive, unzip - //to the working directory, and distill. - $compressed_filename = wmfa_execute('get_compressed_log_file_name', array('date' => $date)); - $full_archive_path = wmf_audit_get_log_archive_dir() . $compressed_filename; - $working_directory = wmfa_execute('get_working_log_dir'); - $cleanup = array(); //add files we want to make sure aren't there anymore when we're done here. - if (file_exists($full_archive_path)) { - wmf_audit_echo("Retrieving $full_archive_path"); - $cmd = "cp $full_archive_path " . $working_directory; - exec(escapeshellcmd($cmd), $ret, $errorlevel); - if (!file_exists($working_directory . $compressed_filename)) { - wmf_audit_log_error("FILE PROBLEM: Trying to get log archives, and something went wrong with $cmd", 'FILE_MOVE'); - return false; - } else { - $cleanup[] = $working_directory . $compressed_filename; - } - //uncompress - wmf_audit_echo("Gunzipping $working_directory$compressed_filename"); - $cmd = "gunzip -f $working_directory$compressed_filename"; - exec(escapeshellcmd($cmd), $ret, $errorlevel); - //now check to make sure the file you expect, actually exists - $uncompressed_file = wmfa_execute('get_uncompressed_log_file_name', array('date' => $date)); - if (!file_exists($working_directory . $uncompressed_file)) { - wmf_audit_log_error("FILE PROBLEM: Something went wrong with uncompressing logs: $cmd : $working_directory.$uncompressed_file doesn't exist.", 'FILE_UNCOMPRESS'); - } else { - $cleanup[] = $working_directory . $uncompressed_file; - } - - //distill & cache locally - $distilled_file = wmfa_execute('get_working_log_file_name', array('date' => $date)); - //Can't escape the hard-coded string we're grepping for, because it breaks terribly. - $cmd = "grep '" . wmfa_execute('get_log_distilling_grep_string') . "' " . escapeshellcmd($working_directory . $uncompressed_file) . " > " . escapeshellcmd($working_directory . $distilled_file); - - wmf_audit_echo($cmd); - $ret = array(); - exec($cmd, $ret, $errorlevel); - chmod($working_directory . $distilled_file, 0770); - $ready_files[$date] = $working_directory . $distilled_file; - - //clean up - if (!empty($cleanup)) { - foreach ($cleanup as $deleteme) { - if (file_exists($deleteme)) { - unlink($deleteme); - } - } - } - - //return - return $working_directory . $distilled_file; - } - - //this happens if the archive file doesn't exist. Definitely not the end of the world, but we should probably log about it. - wmf_audit_log_error("Archive file $full_archive_path seems not to exist\n", 'MISSING_PAYMENTS_LOG'); - return false; -} - -/** * Remove all distilled logs older than the oldest date ($date) * Not even a big deal if we overshoot and remove too many, because we'll just * remake them next time if they're missing. * @param string $date The date string for the oldest log we want to keep + * @param string $working_logs path to working logs directory */ -function wmf_audit_remove_old_logs($date) { - $working_logs = wmf_audit_read_working_logs_dir(); +function wmf_audit_remove_old_logs($date, $working_logs) { if (!empty($working_logs)) { foreach ($working_logs as $logdate => $file) { if ((int) $logdate < (int) $date) { @@ -643,70 +102,14 @@ } /** - * Construct an array of all the distilled working logs we have in the working - * directory. - * @return array Array of date => full path to file for all distilled working - * logs - */ -function wmf_audit_read_working_logs_dir() { - $working_logs = array(); - $working_dir = wmfa_execute('get_working_log_dir'); - //do the directory read and cache the results in the static - if (!$handle = opendir($working_dir)) { - die(__FUNCTION__ . ": Can't open directory. We should have noticed earlier (in setup_required_directories) that this wasn't going to work. \n"); - } - while (( $file = readdir($handle) ) !== false) { - $temp_date = false; - if (wmf_audit_get_filetype($file) === 'working_log') { - $full_path = $working_dir . $file; - $temp_date = wmfa_execute('get_working_log_file_date', array('file' => $file)); //($file); - } - if (!$temp_date) { - continue; - } - $working_logs[$temp_date] = $full_path; - } - return $working_logs; -} - -/** - * Figures out what type of file you've got there, according to what the gateway - * module has defined in its _regex_for_ functions. - * @param string $file Full path to the file of interest - * @return mixed 'recon'|'working_log'|'uncompressed_log'|'compressed_log'|false - * if it's nothing the gateway recognizes. - */ -function wmf_audit_get_filetype($file) { - //we have three types of files, right? compressed, uncompressed, distilled, and recon file. - //...four. Four types. - - $types = array( - 'recon', - 'working_log', - 'uncompressed_log', - 'compressed_log', - ); - - foreach ($types as $type) { - $function_name = 'regex_for_' . $type; - if (preg_match(wmfa_execute($function_name), $file)) { - return $type; - } - } - - return false; -} - -/** * Moves recon files to the completed directory. This should probably only be * done at the beginning of a run: If we're running in queue flood mode, we * don't know if the data will actually make it all the way in. * @param string $file Full path to the file we want to move off + * @param string $completed_dir Path to the recon completed directory. * @return boolean true on success, otherwise false */ -function wmf_audit_move_completed_recon_file($file) { - $files_directory = wmfa_execute('get_recon_completed_dir'); - $completed_dir = $files_directory; +function wmf_audit_move_completed_recon_file($file, $completed_dir) { if (!is_dir($completed_dir)) { if (!mkdir($completed_dir, 0770)) { $message = "Could not make $completed_dir"; @@ -728,121 +131,6 @@ } wmf_audit_echo("Moved $file to $newfile"); return true; -} - -/** - * Make sure all the directories we need are there. - * @return boolean true on success, otherwise false - */ -function wmf_audit_setup_required_directories() { - $directories = array( - 'log_archive' => wmf_audit_get_log_archive_dir(), - 'recon' => wmfa_execute('get_recon_dir'), - 'log_working' => wmfa_execute('get_working_log_dir'), - 'recon_completed' => wmfa_execute('get_recon_completed_dir'), - ); - - foreach ($directories as $id => $dir) { - if (!is_dir($dir)) { - if ($id === 'log_archive' || $id === 'recon') { - //already done. We don't want to try to create these, because required files come from here. - wmf_audit_log_error("Missing required directory $dir", 'MISSING_DIR_' . strtoupper($id)); - return false; - } - - if (!mkdir($dir, 0770)) { - wmf_audit_log_error("Missing and could not create required directory $dir", 'MISSING_DIR_' . strtoupper($id)); - return false; - } - } - } - return true; -} - -/** - * Executes a function that is expected to exist in the relevant submodule - * defined at runtime. Fails angrily if that function doesn't exist. - * @param string $function_name_base Everything after the submodule . '_audit_' - * in the function you're trying to call. - * @param array $parameters The parameters you want to send to the function, in - * array format. - * @return mixed The return value of the specified function. - */ -function wmfa_execute($function_name_base, $parameters = array()) { - $submod_prefix = wmf_audit_runtime_options('submod_prefix'); - $function_name = $submod_prefix . '_audit_' . $function_name_base; - if (function_exists($function_name)) { - return call_user_func_array($function_name, $parameters); - } else { - //serious issues, you have. - wmf_audit_log_error("Missing required function $function_name", 'MISSING_FUNCTION'); //should be fatal - die(); //this really should die here. For real, though. - } - return false; //no way to get here with all the dying -} - -/** - * Check the database to see if we have already recorded the transactions - * present in the recon files. - * @param array $transactions An array of transactions we have already parsed - * out from the recon files. - * @return mixed An array of transactions that are not in the database already, - * or false if something goes wrong enough - */ -function wmf_audit_get_missing_transactions($transactions) { - if (empty($transactions)) { - wmf_audit_echo(__FUNCTION__ . ': No transactions to find. Returning.'); - return false; - } - //go through the transactions and check to see if they're in civi - //@TODO: RECURRING. Won't matter for WP initially, though, so I'm leaving that for the WX integration phase. - $missing = array( - 'main' => array(), - 'negative' => array(), - 'recurring' => array(), - ); - foreach ($transactions as $transaction) { - if (wmfa_execute('record_is_refund', array('record' => $transaction)) || wmfa_execute('record_is_chargeback', array('record' => $transaction))) { //negative - if (wmfa_execute('negative_transaction_exists_in_civi', array('transaction' => $transaction)) === false) { - wmf_audit_echo('-'); //add a subtraction. I am the helpfulest comment ever. - $missing['negative'][] = $transaction; - } else { - wmf_audit_echo('.'); - } - } else { //normal type - if (wmfa_execute('main_transaction_exists_in_civi', array('transaction' => $transaction)) === false) { - wmf_audit_echo('!'); - $missing['main'][] = $transaction; - } else { - wmf_audit_echo('.'); - } - } - } - return $missing; -} - -/** - * Visualization helper. Returns the character we want to display for the kind - * of transaction we have just parsed out of a recon file. - * @param array $record A single transaction from a recon file - * @return char A single char to display in the char block. - */ -function wmf_audit_echochar($record) { - - if (wmfa_execute('record_is_refund', array('record' => $record))) { - return 'r'; - } - - if (wmfa_execute('record_is_chargeback', array('record' => $record))) { - return 'b'; - } - - if ($record['payment_method'] === 'cc') { - return 'c'; - } - - echo print_r($record, true); - die(__FUNCTION__ . " Not cc..."); } /** @@ -892,6 +180,7 @@ * A confusing function for holding local runtime vars without having to goof * around with globals. Works as both get and set, depending on what you pass in * with $confusing_thing + * TODO: Stop that. Some vars should be global, most should be private to the audit class. * @staticvar array $args The arguments set at runtime via drush command * @param array|string|null $confusing_thing A confusing thing. * * When it is an array, sets the internal static $args variable to that array @@ -936,6 +225,7 @@ /** * Returns an array of errors that should not cause the script to blow up, but * which will probably still cause messages to get thrown out. + * TODO: Use WmfException instead. * @return boolean true if the error code is fatal, otherwise false. */ function wmf_audit_error_isfatal($error) { @@ -958,113 +248,6 @@ } else { return true; } -} - - -/** - * Returns supplemental data for the relevant order id from the specified - * payments log, if there is any in there. This is the only sure place we can - * catch data we didn't save, as we're basically grepping for the exact data we - * sent to the gateway. - * It should be noted that this function makes no attempt to normalize the data. - * If this log doesn't contain data for the order_id in question, return false. - * @param string $order_id The order id (transaction id) of the missing payment - * @param string $log The full path to the log we want to search - * @return array|boolean The data we sent to the gateway for that order id, or - * false if we can't find it there. - */ -function wmf_audit_get_xml_log_data_by_order_id($order_id, $log) { - if (!$order_id) { - return false; - } - - $cmd = 'grep ' . wmfa_execute('get_log_line_grep_string', array('order_id' => $order_id)) . ' ' . $log; - wmf_audit_echo(__FUNCTION__ . ' ' . $cmd, true); - - $ret = array(); - exec(escapeshellcmd($cmd), $ret, $errorlevel); - - if (count($ret) > 0) { - - //In this wonderful new world, we only expect one line. - if (count($ret) > 1) { - wmf_audit_echo("Odd: More than one logline returned for $order_id. Investigation Required."); - } - - //just take the last one, just in case somebody did manage to do a duplicate. - $line = $ret[count($ret) - 1]; - // $linedata for *everything* from payments goes Month, day, time, box, bucket, CTID:OID, absolute madness with lots of unpredictable spaces. - $linedata = explode(' ', $line); - $contribution_id = explode(':', $linedata[5]); - $contribution_id = $contribution_id[0]; - - - //look for the raw xml - $full_xml = false; - $node = wmfa_execute('get_log_line_xml_outermost_node'); - $xmlstart = strpos($line, '<?xml'); - if ($xmlstart === false) { - $xmlstart = strpos($line, "<$node>"); - } - $xmlend = strpos($line, "</$node>"); - if ($xmlend) { - $full_xml = true; - $xmlend += (strlen($node) + 3); - $xml = substr($line, $xmlstart, $xmlend - $xmlstart); - } else { - //this is a broken line, and it won't load... but we can still parse what's left of the thing, the slow way. - $xml = substr($line, $xmlstart); - } - // Syslog wart. Other control characters should be encoded normally. - $xml = str_replace( '#012', "\n", $xml ); - - $donor_data = array(); - - if ($full_xml) { - $xmlobj = new DomDocument; - $xmlobj->loadXML($xml); - - $parent_nodes = wmfa_execute('get_log_line_xml_parent_nodes'); - - if (empty($parent_nodes)) { - wmf_audit_log_error(__FUNCTION__ . ': No parent nodes defined. Can not continue.', 'RUNTIME_ERROR'); - die("Stop dying here"); - } - - foreach ($parent_nodes as $parent_node) { - foreach ($xmlobj->getElementsByTagName($parent_node) as $node) { - foreach ($node->childNodes as $childnode) { - if (trim($childnode->nodeValue) != '') { - $donor_data[$childnode->nodeName] = $childnode->nodeValue; - } - } - } - } - } else { - //the XML got cut off prematurely, probably because syslog was set up to truncate on payments. - //rebuild what we can the old-fashioned way. - $search_for_nodes = wmfa_execute('get_log_line_xml_data_nodes'); - - if (empty($search_for_nodes)) { - wmf_audit_log_error(__FUNCTION__ . ': No searchable nodes defined. Can not continue.', 'RUNTIME_ERROR'); - die("Stop dying here"); - } - foreach ($search_for_nodes as $node) { - $tmp = wmf_audit_get_partial_xml_node_value($node, $xml); - if (!is_null($tmp)) { - $donor_data[$node] = $tmp; - } - } - } - - if (!empty($donor_data)) { - $donor_data['contribution_tracking_id'] = $contribution_id; - return $donor_data; - } else { - wmf_audit_log_error("We found a transaction in the logs for $order_id, but there's nothing left after we tried to grab its data. Investigation required.", 'DATA_WEIRD'); - } - } - return false; //no big deal, it just wasn't there. This will happen most of the time. } /** @@ -1097,7 +280,6 @@ $value = substr($xml, $valstart, $valend - $valstart); return $value; } - /** * Returns the contribution tracking data for $record, if we can find it. -- To view, visit https://gerrit.wikimedia.org/r/220714 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I60da6b9ca7f542a32cf01ff8446ac7e68f866e6e Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/fundraising/crm Gerrit-Branch: master Gerrit-Owner: Awight <awi...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits