#!/usr/bin/perl
#============================================================= -*-perl-*-
#
# BackupPC_fixLinks.pl: Identify and correct duplicate pool entries
#                       and missing links to pool
#
# DESCRIPTION
#   See below for detailed description of what it does and how it works
#   
# AUTHOR
#   Jeff Kosowsky
#
# COPYRIGHT
#   Copyright (C) 2008, 2009, 2010  Jeff Kosowsky
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#========================================================================
#
# Version 0.3.1, released December 2010
#
#========================================================================

use strict;
use warnings;
use File::Path;
use File::Find;
#use File::Compare;
use Getopt::Std;
use Fcntl;  #Required for RW I/O masks

use lib "/usr/share/BackupPC/lib";
use BackupPC::FileZIO;
use BackupPC::Lib;
use BackupPC::jLib;
use BackupPC::Attrib qw(:all);

no  utf8;

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
%Conf   = $bpc->Conf(); #Global variable defined in jLib.pm (do not use 'my')

my %opts;
if ( !getopts("i:l:fb:Vdsqvch", \%opts) || @ARGV > 0 || $opts{h} ||
	 ($opts{i} && $opts{l})) {
    print STDERR <<EOF;
usage: $0 [options]

  First, find duplicate entries in the pool.
  Then, search through backup tree to find links to dups. Also, look for
  (non-zero) files that are not linked to the pool (only 1 link).
  Optionally, relink dups and unlinked files (does not affect the pool)
  Optionally, run BackupPC_nightly to clean up the pool.

  Note: you may want to run BackupPC_nightly also before running this to make
  sure there are no holes in the pool (although this shouldn''t happen...)

  Options:

    -i <inode file>  Read pool dups from file and proceed with 2nd pc tree pass
    -l <link file>   Read pool dups & bad pc links from file and proceed
                     with final repair pass
                     NOTE: -i and -l options are mutually exclusive. 
    -s               Skip first pass of generating (or tabulating if
                     -i or -l options are set) cpool dups
    -f               Fix links
    -c               Clean up pool - schedule BackupPC_nightly to run 
                     (requires server running)
    -b <path>        Search backups from <path> (relative to TopDir/pc)
    -V               Verify links of all files in pc path (WARNING: slow!)
    -d               Dry-run
    -q               Quiet - only print summaries & results
    -v               Verbose - print details on each relink
    -h               Print this usage message

EOF
exit(1);
}
my $file = ($opts{i} ? $opts{i} : $opts{l});
my $verifypc=$opts{V};
my $notquiet =!$opts{q};
my $verbose=$opts{v};
$dryrun = $opts{d}; #global variable in jLib.pm
my $fixlinks = $opts{f};
my $runnightly = $opts{c};
#$dryrun =1; #JJK - for testing force to always dry run
my $DRYRUN = ($dryrun == 0 ? "" : " DRY-RUN");
########################

my $md5 = Digest::MD5->new;


my $MaxLinks = $Conf{HardLinkMax};
my $cmprsslvl;
#Note we get rid of any extra lurking double slashes and any trailing slash for directories
(my $TopDir = $bpc->TopDir()) =~ s|//*|/|g; $TopDir =~ s|/$||;
(my $pooldir = $bpc->{PoolDir}) =~ s|//*|/|g; $pooldir =~ s|/$||;  
(my $cpooldir = $bpc->{CPoolDir}) =~ s|//*|/|g; $cpooldir =~ s|/$||;  
chdir($TopDir); #Do this because 'find' will later try to return to working
            #directory which may not be accessible if you are su backuppc

my $pc = "${TopDir}/pc";
my @backups;
if ($opts{b}) {
	(my $backups = "$pc/$opts{b}") =~ s|//*|/|g; $backups =~ s|/$||;
	die "ERROR: '$backups' directory doesn't exist\n" unless -d $backups;
	@backups = ($backups =~ m|^($pc/[^/]+)/?$| ? glob("$1/[0-9]*") : ($backups));
	# If path stops at host, then glob for all backup numbers.
}
else { # Look at all backups - begin 2 levels down i.e. in: TopDir/pc/<host>/<nn>
	@backups = glob("$pc/*/[0-9]*");
}

my %md5sumhash;  #Hash used to store previously seen full file md5sums for NewFiles
my (%inodHOA);
# First find and create hash of arrays of duplicated pool entries:
#  %inodHOA = (
#          <duplicated inode> => [ <name of equivalent parent> , <name of duplicate>, <pool/cpool>, <checksum>, <num links>, <size>],
#          ...
#          <duplicated inode> => [ <name of equivalent parent> , <name of duplicate>, <pool/cpool>, <checksum>, <num links>, <size>],
#          <duplicated inode> => [ <name of equivalent parent> , <name of duplicate>, <pool/cpool>, <checksum>, <num links>, <size>],
#       );
# where checksum = [=-#x@]<first bytpe of dup><first byte of parent>
#   = if files match
#   - if only decompresed versions match
#   # if only decompressed versions match (and flipped))
#   x if newlink/badlink
#   @ if same inode

my @MatchA;
# @MatchA = (<matchname>,  <inoM>, <md5sum>, <dupmd5|matchtype>, pool, <cmprflg><matchbyte><md5sumbyte>, <nlink1M>, <sizeM> )
# where:
#
#  matchname = File name and partial path (beginning after 'pc') to
#              the match in the pc tree. Note when we print it to a
#              file we enclose it in double-quotes "<matchname>"
#
#  inoM      = Inode of the match
#
#  md5sum    = Name of pool entry that has the same (uncompressed)
#              contents as matchname. The name equals the md5sum of
#              the (uncompressed) file plus potentially an _NNN suffix
#              if the data matches something other than the stem
#              md5sum in the pool (or equals all zeros if sum is not
#              calculable for some reason - shouldn't happen).
#              This is the target that we want to link matchname to
#
#  dupmd5    = Name of duplicate pool entry (which is again the md5sum
#              of the contents plus potentially an _NNN suffix). We
#              don't actually need to modify this file. We just unlink
#              all the backup files that share its inode and then let
#              BackupPC nightly delete it when it has no more other
#              links.
#
# matchtype = One of the following
#                NewLink = if match has only one inode but matches 
#                          an existing pool element
#                NewFile = if match has only one inode but doesn't match 
#                          an existing pool element
#                MD5Err  = if for some reason couldn't calculate MD5sum 
#                          (this shouldn't happen)
#
#  pool     =    pool/cpool
#
#  cmparflg = Flag showing how the match and the target compare
#                 @ if this is a duplicate pool element with the SAME inode 
#                   as its parent (i.e. as 'md5sum') -- shouldn't happen
#                 = if 'matchname' has the same contents as 'md5sum'.
#                 - if 'matchname' inflates (i.e. uncompresses) to the same
#                   contents as 'md5sum' (this typically happens
#                   when 'md5sum' has a checksum seed and 'matchname' doesn't
#                 # if 'dupmd5' inflates (i.e. uncompresses) to the same
#                   contents as 'md5sum' but this time 'dupmd5' has the
#                   checksum seed (and the parent which now has a lower
#                   suffix doesn't. For pool dups, this is the reverse case
#                   of '-'. Not applicable for NewLinks and NewFiles.
#                 x MD5Err or if first NewFile that has this contents
#                   (and corresponding md5sum)
#                 y if NewFile but a previous NewFile already has this contents
#                   (and corresponding md5sum
#
# matchbyte  = First byte of the matched file (or dup pool element)
# md5sumbyte = First byte of the corresponding (parent) pool entry that we
#              will be linking to
#                = d6 or d7 if file is compressed and checksum seed present
#                = 78 if file is compressed and checksum seed NOT present
#                = 00 for the not-yet-existent match for a NewFile
#   nlink1M    = Number of links to the match MINUS 1
#   sizeM      = Size of the match in bytes
#
#   Note for matches corresponding to duplicate pool elements, by design:
#   MatchA = (<matchname>, $inoM, @{$inodHOA{$inoM}})

my ($totdups, $collisions, $totlinks, $totsize) = (0, 0, 0, 0);
my ($totmatches, $totmd5errs, $totunlinked, $totnewfiles, $totnewlinks, $totfixed, $totbroken)
	= (0, 0, 0, 0, 0, 0, 0);

# Find or read-in list of duplicate pool entries
if (!$opts{s}) {  # Read in or find duplicate pool entries
	if ($opts{i} || $opts{l}) { #Read in and tabulate previously generated list of inodes from input file (note link entries will be ignored if they exist)
		read_inodHOA($file);
		print_inodHOA() if $notquiet;
	}
	else{ # Find inodes by recursing through the pool
		find(\&pool_dups, $pooldir, $cpooldir); 
	}
	print "Found $totdups dups (and $collisions true collisions) with $totlinks total links and $totsize size\n";
}

# Find backup files with broken/missing links or with links to duplicate pool entries
if ($opts{l}) { # Read in previously generated list of inodes & optionally start fixing links & duplicate pool entries if -f flag set
	read_LinkFile($file);
}
else { #Find bad links in pc path and optionally fix together with duplicate pool nodes if -f flag set
	foreach my $backup (@backups) {
		$backup =~ m#^($pc/[^/]*/[^/]*)#;
		$cmprsslvl = get_bakinfo($1, "compress"); #Note this is set at the level of the backup number
		$cmprsslvl = $Conf{CompressLevel} unless defined($cmprsslvl);
		print "Finding links in $backup\n";
		find(\&find_BadOrMissingLinks, $backup);
	}
}
$totunlinked = $totnewlinks + $totnewfiles;
print "Found $totmatches matching files and $totunlinked unlinked files ($totnewfiles NewFiles, $totnewlinks NewLinks, $totmd5errs MD5Errors)\n";
print "Fixed $totfixed out of $totbroken links\n" if $fixlinks;
run_nightly() if (!$dryrun && $runnightly);
print "DONE\n";
exit;

#####################################################################################################
sub pool_dups {
	my ($devD, $inoD, $modeD, $nlinkD, $uidD, $gidD, $rdevD, $sizeD, $therestD);
	my ($devP, $inoP, $modeP, $nlinkP, $uidP, $gidP, $rdevP, $sizeP, $therestP);
	my $comparflg;

	unless (-r) {  # First check for read error on found element
		warnerr "Can't read : $File::Find::name\n";
		return;
	}
	# Then get root/suffix and check if it is a potential duplicate
	return unless -f && m|(.*)_(.*)|; # file doesn't end with _<num>
	my $root=$1;
	my $suffix=$2;
	my $dup=$_;
	$File::Find::dir =~ m|(c?pool)/[/[:xdigit:]]+$|;
	my $thepool = $1;

	# Then get file information
	unless (($devD, $inoD, $modeD, $nlinkD, $uidD, $gidD, $rdevD, $sizeD, $therestD) 
			= stat($dup)) {
		warnerr "Can't stat: $File::Find::name\n";
		return;
	}
	my $prevsuffix = ($suffix == 0 ? '' : '_' . ($suffix -1));
	warnerr "Hole in pool chain at $root$prevsuffix" unless -f "$root$prevsuffix";

	# Then check to see if any of its "parents" are duplicates

	my $parent = $root;
	for (my $i=-1; $i <  $suffix; $i++, $parent="$root\_$i" ) { 
        #Start at base of chain and move up (note start with -1 for root)
		unless( -f $parent ) {
			warnerr "Parent not a file or unreadable: $File::Find::dir/$parent\n";
			next;
		}
		($devP, $inoP, $modeP, $nlinkP, $uidP, $gidP, $rdevP, $sizeP, $therestP) = stat($parent);
		if ($inoP == $inoD) { #same inodes
			$comparflg='@';
		}
		elsif (($nlinkP + $nlinkD) >= $MaxLinks) {
			next; # Too many links even if files the same
		}
		elsif ( ($comparflg = compare_files($parent,$dup, ($thepool eq "cpool" ? 1 :0))) > 0 ) { #Found match
			$comparflg = ($comparflg == 1 ? '=' : '-');
		}
		else { next; } # Parent is not a copy
		my $fbyteD = firstbyte("$File::Find::dir/$dup");
		my $fbyteP = firstbyte("$File::Find::dir/$parent");
		if(($fbyteD eq 'd6' || $fbyteD eq 'd7') && 
		   !($fbyteP eq 'd6' || $fbyteP eq 'd7'))
		  #NOTE: compressed file without checksums starts with 0x78
		  #      compressed file with checksums starts with 0xd6 or 0xd7
		{  #swap $dup & $parent if only $dup has rsync seed
			my $temp = $dup; $dup = $parent; $parent = $temp;
			$temp = $fbyteD; $fbyteD = $fbyteP;	$fbyteP = $temp;
			$nlinkD = $nlinkP; $sizeD = $sizeP;
			$comparflg='#';
		}
		$inodHOA{$inoD} = [$parent, $dup, $thepool, $comparflg.$fbyteD.$fbyteP, --$nlinkD, $sizeD];
		print "$inoD @{ $inodHOA{$inoD} }\n" if $notquiet;
#		print "$inoD $parent $dup $thepool $comparflg, $nlinkD $sizeD\n";
		$totdups++;
		$totlinks += $nlinkD;
		$totsize += $sizeD;
		return;  #Earliest duplicate checksum (i.e. parent) in the chain found so stop going down chain
	}
	# No matching copies found in the chain
	print "$inoD $dup COLLISION $thepool X $nlinkD $sizeD\n" if $notquiet;
	$collisions++;
}

sub print_inodHOA {
	for my $inode (keys %inodHOA) {
		print "$inode @{ $inodHOA{$inode} }\n";
#		print "$inodHOA{$inode}[0] $inodHOA{$inode}[1] etc...\n";
	}
}

sub read_inodHOA {
	my $file=$_[0];
	$totdups = $collisions = $totlinks = $totsize = 0;
	die "Error: file not readable: $file\n" unless -f $file && -r $file;
	open(IN,$file) || die "Can't open $file for reading";
	while(<IN>) {
		m|^(\d+)\s+([[:xdigit:]]+(_\d+)?)\s+([[:xdigit:]]+(_\d+)?)\s+(c?pool)\s+([-=#@][[:xdigit:]]+)\s+(\d+)\s+(\d+)| || next;
		$inodHOA{$1} = [$2, $4, $6, $7, $8, $9]; 
		$totdups++;
		$totsize += $10;
		$totlinks += $9;
	}
}
		
sub find_BadOrMissingLinks {
	my $fixed ='';
	unless (-r) {  # First check for read error on found element
		warnerr "Can't read : $File::Find::name\n";
		return;
	}
	return unless -f; #Not a file
	return unless m|^f| || m|^attrib$|; # Skip files without 'f' mangle and that are not attrib files
	my $matchtype= BadOrMissingLinks($File::Find::name);
	return if $matchtype < 0;
	if($fixlinks && $matchtype > 0) {
		$totbroken++;
		if(fix_links($matchtype) > 0) { #Go fix link...
			$totfixed++;
			$fixed=" FIXED$DRYRUN";
		}
		else {$fixed=" BROKEN$DRYRUN";}
	}
	if ($notquiet) {
		my $name = shift(@MatchA);
		print "\"" . $name . "\" " . join(" ", @MatchA) . "$fixed\n";
	}
}

# Return -1 if no problem detected with link
# Return -2 if can't stat file (shouldn't happen)
# Return 0 if MD5Err - shouldn't happen
# Return 1 if links to pool dup in %inodHoA
# Return 2 if no links to pool but matching pool entry found (NewLink)
# Return 3 if no links to pool and no matching pool entry found (NewFile)
sub BadOrMissingLinks {
	my $matchpath = $_[0];
	(my $matchname = $matchpath) =~ s|^$pc/*||; # Delete leading path directories (up to machine)

	my $rettype;
	my $matchtype;
	my ($devM, $inoM, $modeM, $nlinkM, $uidM, $gidM, $rdevM, $sizeM, $therestM);

	unless (($devM, $inoM, $modeM, $nlinkM, $uidM, $gidM, $rdevM, $sizeM, $therestM)
			= stat($_)) {
		warnerr "Can't stat: $matchpath\n";
		return -2; #This really shouldn't happen!
	}
	if (exists $inodHOA{$inoM}) { #File links to dup pool element in our list
		@MatchA = ($matchname, $inoM, @{$inodHOA{$inoM}});
#		print "\"$matchname\" $inoM @{ $inodHOA{$inoM} }\n";
		$totmatches++;
		return 1;  #type=1
	}
	elsif($sizeM == 0 || ($nlinkM > 1 && !$verifypc)){
		return -1; #Zero length or single-linked file
	}
	else {
		my $matchbyte = firstbyte($matchpath);
		my $comparflg = 'x';  # Default if no link to pool
		my $matchtype = "NewFile"; # Default if no link to pool
		my $md5sumbyte = '00'; # Default if no link to pool
		my $md5sum = zFile2MD5($bpc, $md5, $matchpath, 0, $cmprsslvl);
		if ($md5sum == -1) { #Can't create MD5sum
			$md5sum = "00000000000000000000000000000000";
			$matchtype = "MD5Error";
			$totmd5errs++;
			$rettype=0;
			goto match_return;
		}
		my $thepool = ($cmprsslvl > 0 ? "cpool" : "pool");
		my $thepooldir = ($cmprsslvl > 0 ? $cpooldir : $pooldir);
		my $md5sumpathbase = $bpc->MD52Path($md5sum, 0, $thepooldir);
		my $i;
		if($verifypc) {
			for ($i=-1, my $md5sumpath = $md5sumpathbase; 
				 -f $md5sumpath; $md5sumpath = $md5sumpathbase . '_' . ++$i) {
				#Start at the root, looking for inode match in the pool...
				return -1 if($inoM ==  (stat($md5sumpath))[1]);
			}
			#Otherwise, pc file not found in pool
		}
		# Now we know we have a pc file that doesn't link to the pool...
		for ($i=-1, my $md5sumpath = $md5sumpathbase; 
			 -f $md5sumpath; $md5sumpath = $md5sumpathbase . '_' . ++$i) {
            #Again start at the root, try to find file content match in pool...
			if ((my $cmpresult = compare_files ($matchpath, $md5sumpath, $cmprsslvl)) > 0) { #Exact file match found

				my $inod =(stat($md5sumpath))[1]; #inode
				if (exists $inodHOA{$inod}) { #Oops target set to be relinked
					$md5sum = $inodHOA{$inod}[0]; # Set to parent
					$md5sumpath =$bpc->MD52Path($md5sum, 0, $thepooldir);
					$cmpresult = compare_files($matchpath,$md5sumpath, $cmprsslvl);
					$|++; warn "Note: NewLink is also a duplicate pool entry - relinking & fixed\n";
				}
				else {
					($md5sum .= '_' . $i) if $i >= 0;

				}
				$comparflg = ($cmpresult == 1 ? '=' : '-');
				$md5sumbyte = firstbyte($md5sumpath);
				$matchtype = "NewLink";
				$totnewlinks++;
				$rettype=2; #NewLink
				goto match_return;
			} #Otherwise, continue up the chain looking for a pool match...
		}
		$totnewfiles++; #Otherwise must be a NewFile since not found in pool
		my $fullmd5sum = zFile2FullMD5($bpc, $md5, $matchpath, $cmprsslvl);
		($md5sum .= '_' . $i) if $i >= 0;  # Name of first empty pool slot
		if ($md5sumhash{$fullmd5sum}) {   #Already seen before!
			$comparflg = 'y';
			$md5sum = $md5sumhash{$fullmd5sum};
			$rettype=4; #NewFile-y
		}
		else {
			$md5sumhash{$fullmd5sum} = $md5sum;
			$rettype=3; #NewFile-x
		}

	  match_return:
		@MatchA = ($matchname, $inoM, $md5sum, $matchtype, $thepool, ${comparflg}.${matchbyte}.${md5sumbyte}, $nlinkM, $sizeM);
#		print "\"$matchname\" $inoM $md5sum $matchtype $thepool ${comparflg}${matchbyte}${md5sumbyte} $nlinkM $sizeM\n";
		return $rettype;
	}
}

#Read in link file for matching pool md5sums(dups), NewFiles, NewLinks; don't read in MD5Err entries or other errors
sub read_LinkFile {
	my $file=$_[0];
	my $matchtype;
	my $fixed='';
	die "Error: file not readable: $file\n" unless -f $file && -r $file;
	open(IN,$file) || die "Can't open $file for reading";
	while(<IN>) {
		$matchtype = read_match($_);
		++$totmatches if $matchtype==1;
		++$totnewlinks if $matchtype==2 || $matchtype==4;
		++$totnewfiles if $matchtype==3;
		if($fixlinks && $matchtype > 0) {
			$totbroken++;
			if (fix_links($matchtype) > 0) {
				$totfixed++;
				$fixed=" FIXED$DRYRUN";
			}
			else {$fixed=" BROKEN$DRYRUN";}
		}
		my $name = shift(@MatchA);
		print "\"" . $name . "\" " . join(" ", @MatchA) . "$fixed\n" 
			if $matchtype >= 0 && $notquiet;
	}
}

sub read_match {
	my $ret=-1;
	if (m|^"(.*)"\s+(\d+)\s+([[:xdigit:]]+(_\d+)?)\s+([[:xdigit:]]+(_\d+)?)\s+(c?pool)\s+([-=#@][[:xdigit:]]+)\s+(\d+)\s+(\d+)|) {
		$ret=1; #Dup match:  Link to dup node in pool
	}
	elsif (m|^"(.*)"\s+(\d+)\s+([[:xdigit:]]+(_\d+)?)\s+((NewLink))\s+(c?pool)\s+([-=][[:xdigit:]]+)\s+(\d+)\s+(\d+)|) {
		$ret=2; #NewLink: File without links but has matching pool entry (Note parentheses added to keep numbering the same)
	}
	elsif (m|^"(.*)"\s+(\d+)\s+([[:xdigit:]]+(_\d+)?)\s+((NewFile))\s+(c?pool)\s+(x[[:xdigit:]]+)\s+(\d+)\s+(\d+)|) {
		$ret=3; #NewFile-x: File without links and without existing matching pool entry and without a previous NewFile
		        #with the same content (Note parentheses added to keep numbering the same)
	}
	elsif (m|^"(.*)"\s+(\d+)\s+([[:xdigit:]]+(_\d+)?)\s+((NewFile))\s+(c?pool)\s+(y[[:xdigit:]]+)\s+(\d+)\s+(\d+)|) {
		$ret=4; #NewFile-y: File without links and without existing  matching pool entry but a previous NewFile with the same
		        #content will previously have created the new pool entry (Note parentheses added to keep numbering the same)
	}
	else {return -1;}
	@MatchA = ( $1, $2, $3, $5, $7, $8, $9, $10);
	return $ret;
}

sub fix_links {
	my ($type) = @_;
	my ($matchname, $inoM, $md5sum, $matchtype, $thepool, $checksumbytes, $nlinkM, $sizeM) = @MatchA;
	$checksumbytes =~ m|^(.)(..)(..)$|;
	my $cmprflag = $1;
	my $matchbyte = $2;
	my $md5sumbyte = $3;
	my $md5sumpath = $bpc->MD52Path($md5sum, 0, ($thepool eq "cpool" ? $cpooldir : $pooldir));
	my $matchpath = "$pc/$matchname";
	my $compress = ($thepool eq "cpool" ? 1 : 0);

	#First, perform extra checks (should be unncessary, but I'm paranoid)
	unless (-r $matchpath) {
		warnerr "\"$matchpath\" - Can't read file\n";
		return -1;
	}
	my ($devMM, $inoMM, $modeMM, $nlinkMM, $uidMM, $gidMM, $rdevMM, $sizeMM, $therestMM) = stat($matchpath);
	if ($inoM != $inoMM || $sizeM != $sizeMM) {
		warnerr "\"$matchpath\" - Something changed... Inode or size doesn't match previous\n";
		return -1;
	}

	$type =3 if $dryrun && $type == 4;  # For dry-run NewFile-y behaves like NewFile-x since link not created
	if (($type == 1 && $matchtype =~ m|^[[:xdigit:]]+(_\d+)?$|)  || #Duplicate pool entry
		($type == 2 && $matchtype =~ m|^NewLink$|) ||  #New Link
		($type == 4 && $matchtype =~ m|^NewFile$|)) {  #New File with previously created link (by previous NewFile)
		# Unlink $matchname and relink to $md5sum

		unless ( -r $md5sumpath) {
			warnerr "\"$matchname\" - Can't read new link target: \"$md5sum\"\n";
			return -1;
		} 

		my ($devP, $inoP, $modeP, $nlinkP, $uidP, $gidP, $rdevP, $sizeP, $therestP) = stat($md5sumpath);
		if (($nlinkP + 1) >= $MaxLinks) { 
			$|++; warn "Warn: \"$matchname\" - Linking would exceed HardLinkMax for  \"$md5sum\"\n";
			return -1; #Note, this still leaves everything OK, since the file is still linked to the pool
                       #just means we can't free up an extra pool entry (should rarely happen anyway since
			           #we have already checked this...
		}
		if(compare_files($matchpath, $md5sumpath, $compress) <= 0) {
			warnerr "\"$matchname\" - contents don't match \"$md5sum\"\n";
			return -1;
		}

		if(!junlink($matchpath)){
			warnerr "\"$matchname\" - unlink failed\n";
			return -1;	
		}
	    if(!jlink($md5sumpath, $matchpath)){
			warnerr "\"$matchname\" - link from \"$md5sum\" failed\n";
			return -1;
			}
		print "\"$matchname\" successfully (re)linked from $matchtype [$inoM] to $md5sum [$inoP]" if $verbose;
		return 1;
	}
	elsif ($type == 3 && $matchtype =~ m|^NewFile$|) {  #New File
		# Make new link in pool directory, adding additional subdirectories as needed
		if ( -r $md5sumpath) {  # Check to see if something else took the planned target
			warnerr "\"$matchname\" - target already exists: \"$md5sum\"\n";
			return -1;
		} 
		$md5sum =~ m|^([[:xdigit:]]+)|; # Strip off the suffix
		unless (zFile2MD5($bpc, $md5, $matchpath, 0, $compress) == $1) {
			warnerr "\"$matchname\" - md5sum doesn't match \"$md5sum\"\n";
			return -1;
		}
		$md5sumpath =~ m|(.*)/|;  # Find the containing directory
		jmkpath($1, 0, 0777) if (!-d $1);
		print "\"$matchname\" - Making new pool directory $1\n" if ($verbose && ! -d $1);
	    if (!jlink($matchpath, $md5sumpath)){ # Note reverse order of link from types 1&2
			warnerr "\"$matchname\" - link to \"$md5sum\" failed\n";
			return -1;
		}
		print "\"$matchname\" successfully linked to new file $md5sum [$inoM]" if $verbose;
		return 1;
	}
	else {
		warnerr "Invalid type ($type) doesn't match $matchtype\n";
		return -1;
	}
}

sub compare_files
{
	my ($file1, $file2, $compress)=@_;
	return 1 if !jcompare($file1, $file2);  #Matches as-is
	return 2 if $compress && !zcompare($file1, $file2, $compress);  #Matches post-inflation
	return 0; # Not a match or error
}

#Simple wrappers to protect when just doing dry runs
sub jlink
{
	return 1 if $dryrun;
	link $_[0], $_[1];
}

sub junlink
{
	return 1 if $dryrun;
	unlink @_;
}

sub jmkpath
{
	return 1 if $dryrun;
	mkpath $_[0], $_[1], $_[2];
}
