Package: perforate Version: 1.0-17 Severity: wishlist Tags: patch I rewrite finddup in perl to fix many issues. This will also close the bugs #222030, #263779 and #289911.
The new script also replace nodup (see finddup --man) -- System Information: Debian Release: 3.1 APT prefers unstable APT policy: (800, 'unstable'), (700, 'testing') Architecture: i386 (i686) Kernel: Linux 2.4.29 Locale: LANG=de_DE, LC_CTYPE=de_DE (charmap=ISO-8859-1) (ignored: LC_ALL set to de_DE) Versions of packages perforate depends on: ii libc6 2.3.2.ds1-20 GNU C Library: Shared libraries an -- no debconf information -- Klaus Ethgen http://www.ethgen.de/ pub 2048R/D1A4EDE5 2000-02-26 Klaus Ethgen <[EMAIL PROTECTED]> Fingerprint: D7 67 71 C4 99 A6 D4 FE EA 40 30 57 3C 88 26 2B
#! /usr/bin/perl
#
# finddup 2.0 - find identical files and do somethink with it.
#
use strict;
use warnings;
use File::Find ();
use Digest::MD5;
use Getopt::Long;
use Pod::Usage;
# for the convenience of &wanted calls, including -eval statements:
use vars qw(*name *dir *prune);
*name = *File::Find::name;
*dir = *File::Find::dir;
*prune = *File::Find::prune;
use vars qw($RCS_VERSION $VERSION $opt %filelist %md5list);
sub wanted;
$RCS_VERSION = '$Id: finddup,v 2.1 2005/02/05 18:43:11 klaus Exp $';
($VERSION = '$Revision: 2.1 $') =~ s/^\D*([\d.]*)\D*$/$1/;
GetOptions($opt = {}, qw(help|h man version noaction|n verbose|v quiet|q link|l
oldresult|o)) || pod2usage 2;
pod2usage(1) if $opt->{help};
pod2usage(-exitstatus => 0, -verbose => 2) if $opt->{man};
if ($opt->{version}) { print "Version: $VERSION\n"; exit 0; }
# Force some options
$opt->{verbose} = 1 if not exists $opt->{verbose} and $opt->{noaction};
$opt->{link} = 1 if not exists $opt->{link} and $0 =~ /^(.*\/)?nodup(.pl)?$/;
$opt->{oldresult} = 1 if not exists $opt->{oldresult} and $0 =~
/^(.*\/)?nodup(.pl)?$/;
if ($opt->{oldresult})
{
while (<>)
{
chomp;
s/^(\d+) '//;
my $size = $1;
s/'$//;
my @files = split(/' '/);
open IN, "<", $files[0];
my $md5 = Digest::MD5->new->addfile(*IN)->hexdigest;
close IN;
$md5list{$md5} = [[$size, [EMAIL PROTECTED];
} # while (<>)
} # if ($opt->{oldresult})
else
{
# Traverse desired filesystems
File::Find::find({wanted => \&wanted}, '.');
# Now calculate all md5sums. Afterwards %filelist can be freed.
foreach (sort {$a->[1]->[0] cmp $b->[1]->[0]} values(%filelist))
{
open IN, "<", $_->[1]->[0];
my $md5 = Digest::MD5->new->addfile(*IN)->hexdigest;
close IN;
$md5list{$md5} = [] unless exists $md5list{$md5};
push @{$md5list{$md5}}, $_;
}
%filelist = ();
} # if ($opt->{oldresult}) { ... }...
# Now we can output doubles sorted by size
foreach (sort {$md5list{$b}->[0]->[0] <=> $md5list{$a}->[0]->[0]}
keys(%md5list))
{
next unless @{$md5list{$_}} > 1; # This file is single
my $size = $md5list{$_}->[0]->[0];
my $reffile = $md5list{$_}->[0]->[1]->[0];
if ($size) # Do not output empty files
{
if ($opt->{link})
{
print "L�nge: $size Files:\t$reffile\n" if $opt->{verbose};
foreach (@{$md5list{$_}})
{
foreach (@{$_->[1]})
{
print "\t\t\t$_\n" if $opt->{verbose};
unless ($opt->{noaction})
{
unlink $_ || die "Fehler beim L�schen von '$_'";
link $reffile, $_ || die "Fehler beim ln '$reffile' '$_'";
}
}
}
print "\n" if $opt->{verbose};
} # if ($opt->{link})
else
{
print "$size" unless $opt->{quiet};
foreach (@{$md5list{$_}})
{
foreach (@{$_->[1]})
{
print " '$_'" unless $opt->{quiet};
}
}
print "\n" unless $opt->{quiet};
} # if ($opt->{link}) { ... } else
} # if ($size) # Do not output emp...
} # foreach (keys(%md5list))
exit 0;
sub wanted
{
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size);
if ((($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size) = lstat($_)) &&
!($File::Find::prune |= ($dev != $File::Find::topdev)) && -f _)
{
$filelist{$ino} = [$size, []] unless exists $filelist{$ino};
push @{$filelist{$ino}->[1]}, $name;
}
}
__END__
=head1 NAME
finddup - Find identical files and do somethink with it
=head1 SYNOPSIS
B<finddup> [I<options>...]
--man the manpage
-h, --help a short help
--version the version (CVS) of the program
-n, --noaction do just nothing, just print out (implies -v)
-v, --verbose just what the name says
-q, --quiet be quiet
-l, --link link the identical files together
-o, --oldresult Use the old output of this script
=head1 DESCRIPTION
finddup search the working directory and all files below on the same partition
for duplicate files.
finddup can optional hardlink such files to save space.
Files size 0 will not be reported or hardlinked as this might give problemes
later.
This is a complete rewrite of the finddup in perl to handle several issues:
=over
=item
Allow spaces and other characters in filenames
=item
be faster
=item
include nodup in same script
=item
Handle if the files allready have other hardlinks to it in the same tree
=item
Several improbvements
=back
If started as nodup or nodup.pl the script will act like started with optiones
--link and
--oldresult
=head1 COPYRIGHT
Copyright (c) 2005 by Klaus Ethgen. All rights reserved.
=head1 LICENSE
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
=head1 AUTHOR
S<Klaus Ethgen E<lt>[EMAIL PROTECTED]<gt>>
=head1 HISTORY
$Log: finddup,v $
Revision 2.1 2005/02/05 18:43:11 klaus
Just cosmetic
Revision 2.0 2005/02/05 18:41:20 klaus
Completely new version
=cut
signature.asc
Description: Digital signature

