--- Begin Message ---
Package: fdupes
Version: 1.50-PR2-3
Source: fdupes
Tags: patch
Salut!
been hacking away at fdupes for the last days to scratch my own itches; the extra functionality i originally wanted to have is now
mostly in and working. When i find the time, i will continue cleaning up the current code, perhaps implement one or two other
wishlist items, also clean up the documentation. For now, this patch is what emerged and i thought better forward it ASAP to prevent
bitrot, also to get feedback or testing. It is to be applied after the fdupes_1.50-PR2-3.diff.. changes as follows:
- prettified and unified printing of file sizes
- printing of file modification date
- optional printing of checksum
- empty files now excluded by default
- always print summary at top
- customizable small size limit for files to be excluded
(i want to see ZIPs, PDFs, ISOs not HTML EXE BAK => --atleast 0.6)
- instead of typing 'all', 'a' or just hitting enter
will preserve all files, i.e. skip on
- a PROGRESSBAR!!!11!1! so fun watching it crawl starbord
- misc code beautification
what remains to be done:
- updating/reworking of options and documentation
- result sorting
- ncurses interface?
Well, at least from my testing, no bugs introduced. And yes, it's laughable - but the --showcrc option makes me much more
comfortable killing files ^^
So here it is, have a look and please report back any problems.
regards,
marcel (happy gento0er AND debianaut btw ;D)
--- fdupes.c.orig 2011-02-04 17:28:19.599253312 +0100
+++ fdupes.c 2011-02-07 15:43:38.643146444 +0100
@@ -23,6 +23,7 @@
#include <stdarg.h>
#include <string.h>
#include <sys/stat.h>
+#include <sys/ioctl.h>
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
@@ -30,6 +31,7 @@
#include <getopt.h>
#endif
#include <string.h>
+#include <time.h>
#include <errno.h>
#include <libgen.h>
@@ -45,21 +47,24 @@
#define F_DSAMELINE 0x0004
#define F_FOLLOWLINKS 0x0008
#define F_DELETEFILES 0x0010
-#define F_EXCLUDEEMPTY 0x0020
-#define F_CONSIDERHARDLINKS 0x0040
-#define F_SHOWSIZE 0x0080
-#define F_OMITFIRST 0x0100
-#define F_RECURSEAFTER 0x0200
-#define F_NOPROMPT 0x0400
-#define F_SUMMARIZEMATCHES 0x0800
-#define F_EXCLUDEHIDDEN 0x1000
-#define F_HARDLINKFILES 0x2000
-#define F_DEBUGINFO 0x4000
+#define F_INCLUDEEMPTY 0x0020
+#define F_EXCLUDESMALL 0x0040
+#define F_EXCLUDEHIDDEN 0x0080
+#define F_CONSIDERHARDLINKS 0x0100
+#define F_SHOWCRC 0x0200
+#define F_OMITFIRST 0x0400
+#define F_RECURSEAFTER 0x0800
+#define F_NOPROMPT 0x1000
+#define F_SUMMARIZEONLY 0x2000
+#define F_HARDLINKFILES 0x4000
+#define F_DEBUGINFO 0x8000
char *program_name;
unsigned long flags = 0;
+float minsize = 1;
+
#define CHUNK_SIZE 8192
#define INPUT_SIZE 256
@@ -145,6 +150,30 @@
}
}
+char *formatbytesize(off_t size)
+{
+ static char sizestr[64];
+
+ if (size < 2) snprintf(sizestr, 64, "%zu Byte", size);
+ else if (size < 1024) snprintf(sizestr, 64, "%zu Bytes", size);
+ else if (size < 1048576) snprintf(sizestr, 64, "%.2f Kibibytes", (float)
size / 1024);
+ else if (size < 1073741824) snprintf(sizestr, 64, "%.2f Mebibytes", (float)
size / 1048576);
+ else snprintf(sizestr, 64, "%.2f Gibibytes", (float) size / 1073741824);
+ return sizestr;
+}
+
+void printfiledetails(file_t *file)
+{
+ char time[42];
+
+ strftime(time, sizeof(time), "%Y-%m-%d %H:%M:%S", localtime(&file->mtime));
+ printf("%s (%s)", file->d_name, time);
+ if (ISFLAG(flags, F_SHOWCRC))
+ printf(" CRC:%s\n", file->crcsignature);
+ else
+ printf("\n");
+}
+
off_t filesize(char *filename) {
struct stat s;
@@ -237,7 +266,7 @@
{
DIR *cd;
file_t *newfile;
- struct dirent *dirinfo;
+ struct dirent *direntry;
int lastchar;
int filecount = 0;
struct stat info;
@@ -253,8 +282,8 @@
return 0;
}
- while ((dirinfo = readdir(cd)) != NULL) {
- if (strcmp(dirinfo->d_name, ".") && strcmp(dirinfo->d_name, "..")) {
+ while ((direntry = readdir(cd)) != NULL) {
+ if (strcmp(direntry->d_name, ".") && strcmp(direntry->d_name, "..")) {
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
fprintf(stderr, "\rBuilding file list %c ", indicator[progress]);
progress = (progress + 1) % 4;
@@ -275,7 +304,7 @@
newfile->duplicates = NULL;
newfile->hasdupes = 0;
- newfile->d_name = (char*)malloc(strlen(dir)+strlen(dirinfo->d_name)+2);
+ newfile->d_name = (char*)malloc(strlen(dir)+strlen(direntry->d_name)+2);
if (!newfile->d_name) {
errormsg("out of memory!\n");
@@ -288,7 +317,7 @@
lastchar = strlen(dir) - 1;
if (lastchar >= 0 && dir[lastchar] != '/')
strcat(newfile->d_name, "/");
- strcat(newfile->d_name, dirinfo->d_name);
+ strcat(newfile->d_name, direntry->d_name);
if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
fullname = strdup(newfile->d_name);
@@ -301,7 +330,9 @@
free(fullname);
}
- if (filesize(newfile->d_name) == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) {
+ if ((filesize(newfile->d_name) == 0 && !ISFLAG(flags, F_INCLUDEEMPTY)) ||
+ (direntry->d_type == DT_REG && ISFLAG(flags, F_EXCLUDESMALL) &&
filesize(newfile->d_name) < (int) minsize)) {
+// printf("excluding %s size %s\n", newfile->d_name,
formatbytesize(filesize(newfile->d_name)));
free(newfile->d_name);
free(newfile);
continue;
@@ -314,6 +345,7 @@
}
if (lstat(newfile->d_name, &linfo) == -1) {
+ printf("lstat failed on %s size %s\n", newfile->d_name,
formatbytesize(filesize(newfile->d_name)));
free(newfile->d_name);
free(newfile);
continue;
@@ -646,15 +678,8 @@
if (numsets == 0)
printf("No duplicates found.\n\n");
else
- {
- if (numbytes < 1024.0)
- printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n",
numfiles, numsets, numbytes);
- else if (numbytes <= (1000.0 * 1000.0))
- printf("%d duplicate files (in %d sets), occupying %.1f kylobytes\n\n",
numfiles, numsets, numbytes / 1000.0);
- else
- printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n",
numfiles, numsets, numbytes / (1000.0 * 1000.0));
-
- }
+ printf("%d duplicate files (in %d sets), occupying %s.\n\n",
+ numfiles, numsets, formatbytesize(numbytes));
}
void printmatches(file_t *files)
@@ -664,21 +689,29 @@
while (files != NULL) {
if (files->hasdupes) {
if (!ISFLAG(flags, F_OMITFIRST)) {
- if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", files->size,
- (files->size != 1) ? "s " : " ");
- if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
- printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
+ printf("%s each:\n", formatbytesize(files->size));
+ if (ISFLAG(flags, F_DSAMELINE))
+ {
+ escapefilename("\\ ", &files->d_name);
+ printf("%s ", files->d_name);
+ }
+ else
+ printfiledetails(files);
}
tmpfile = files->duplicates;
while (tmpfile != NULL) {
- if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &tmpfile->d_name);
- printf("%s%c", tmpfile->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
+ if (ISFLAG(flags, F_DSAMELINE))
+ {
+ escapefilename("\\ ", &tmpfile->d_name);
+ printf("%s ", tmpfile->d_name);
+ }
+ else
+ printfiledetails(tmpfile);
tmpfile = tmpfile->duplicates;
}
printf("\n");
-
}
-
+
files = files->next;
}
}
@@ -788,25 +821,38 @@
exit(1);
}
+ printf("Hint:\n");
+ printf("The files to keep for each match set can be specified\n");
+ printf("individually by listing the corresponding numbers\n");
+ printf("delimited by spaces and commata (as in [1 2 3] or [1,4])\n");
+ printf(" - or enter [all]/[a]/[RETURN] to not delete any files.\n");
+ printf("The sets are sorted as last modified, oldest copy on top.\n\n");
while (files) {
if (files->hasdupes) {
curgroup++;
counter = 1;
dupelist[counter] = files;
- if (prompt) printf("[%d] %s\n", counter, files->d_name);
+ if (prompt) {
+ printf("[%d] ", counter);
+ printfiledetails(files);
+ }
tmpfile = files->duplicates;
while (tmpfile) {
dupelist[++counter] = tmpfile;
- if (prompt) printf("[%d] %s\n", counter, tmpfile->d_name);
+
+ if (prompt) {
+ printf("[%d] ", counter);
+ printfiledetails(tmpfile);
+ }
tmpfile = tmpfile->duplicates;
}
if (prompt) printf("\n");
- if (!prompt) /* preserve only the first file */
+ if (!prompt) /* preserve first file & delete clones */
{
preserve[1] = 1;
for (x = 2; x <= counter; x++) preserve[x] = 0;
@@ -815,11 +861,8 @@
else /* prompt for files to preserve */
do {
- printf("Set %d of %d, preserve files [1 - %d, all]",
- curgroup, groups, counter);
- if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", files->size,
- (files->size != 1) ? "s " : " ");
- printf(": ");
+ printf("Set %d of %d (%s each), files to keep [1 - %d, all]: ",
+ curgroup, groups, formatbytesize(files->size), counter);
fflush(stdout);
fgets(preservestr, INPUT_SIZE, stdin);
@@ -843,17 +886,19 @@
for (x = 1; x <= counter; x++) preserve[x] = 0;
token = strtok(preservestr, " ,\n");
-
- while (token != NULL) {
- if (strcasecmp(token, "all") == 0)
+
+ do {
+ if (token == NULL || strcasecmp(token, "a") == 0 || strcasecmp(token,
"all") == 0)
for (x = 0; x <= counter; x++) preserve[x] = 1;
+ if (token != NULL && strcasecmp(token, "none") == 0)
+ for (x = 0; x <= counter; x++) preserve[x] = 0;
number = 0;
- sscanf(token, "%d", &number);
+ if (token) sscanf(token, "%d", &number);
if (number > 0 && number <= counter) preserve[number] = 1;
token = strtok(NULL, " ,\n");
- }
+ } while (token != NULL);
for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x];
} while (sum < 1); /* make sure we've preserved at least one file */
@@ -1043,12 +1088,13 @@
printf(" -H --hardlinks \tnormally, when two or more files point to the
same\n");
printf(" \tdisk area they are treated as non-duplicates;
this\n");
printf(" \toption will change this behavior\n");
- printf(" -n --noempty \texclude zero-length files from consideration\n");
- printf(" -A --nohidden \texclude hidden files from consideration\n");
+ printf(" -e --empty \tinclude zero-length files (excluded by default
as of 1.60)\n");
+ printf(" -a[n] --atleast[=n]\texclude files smaller n MiB (default:
1.0)\n");
+ printf(" -A --nohidden \texclude hidden files\n");
printf(" -f --omitfirst \tomit the first file in each set of matches\n");
printf(" -1 --sameline \tlist each set of matches on a single line\n");
- printf(" -S --size \tshow size of duplicate files\n");
- printf(" -m --summarize \tsummarize dupe information\n");
+ printf(" -S --showcrc \tshow check of each duplicate file (rip,
paranoiac ;)\n");
+ printf(" -m --summarizeonly\tonly summarize found dupes\n");
printf(" -q --quiet \thide progress indicator\n");
printf(" -d --delete \tprompt user for files to preserve and delete
all\n");
printf(" \tothers; important: under particular
circumstances,\n");
@@ -1056,14 +1102,12 @@
printf(" \twith -s or --symlinks, or when specifying a\n");
printf(" \tparticular directory more than once; refer to
the\n");
printf(" \tfdupes documentation for additional
information\n");
- /* printf(" -r --dlink \t(description)\n"); */
printf(" -L --linkhard \thardlink duplicate files to the first file
in\n");
printf(" \teach set of duplicates without prompting the
user\n");
- printf(" -N --noprompt \ttogether with --delete, preserve the first file
in\n");
+ printf(" -N --noprompt \ttogether with --delete, preserve the oldest copy
in\n");
printf(" \teach set of duplicates and delete the rest
without\n");
- printf(" \twithout prompting the user\n");
+ printf(" \tprior confirmation (DANGEROUS!)\n");
printf(" -D --debug \tenable debugging information\n");
- printf(" \teach set of duplicates without prompting the
user\n");
printf(" -v --version \tdisplay fdupes version\n");
printf(" -h --help \tdisplay this help message\n\n");
#ifdef OMIT_GETOPT_LONG
@@ -1073,6 +1117,7 @@
int main(int argc, char **argv) {
int x;
+ int i;
int opt;
FILE *file1;
FILE *file2;
@@ -1080,9 +1125,13 @@
file_t *curfile;
file_t **match = NULL;
filetree_t *checktree = NULL;
+ struct winsize ws;
+ int width = 0;
int filecount = 0;
int progress = 0;
+ int bar = 0;
char **oldargv;
+ char progressbuf[256];
int firstrecurse;
#ifndef OMIT_GETOPT_LONG
@@ -1095,18 +1144,21 @@
{ "recursive:", 0, 0, 'R' },
{ "quiet", 0, 0, 'q' },
{ "sameline", 0, 0, '1' },
- { "size", 0, 0, 'S' },
+ { "showcrc", 0, 0, 'S' },
+ { "size", 0, 0, 'S' }, // deprecated
{ "symlinks", 0, 0, 's' },
{ "hardlinks", 0, 0, 'H' },
{ "relink", 0, 0, 'l' },
{ "linkhard", 0, 0, 'L' },
- { "noempty", 0, 0, 'n' },
+ { "empty", 0, 0, 'e' },
+ { "atleast", 2, 0, 'a' },
{ "nohidden", 0, 0, 'A' },
{ "delete", 0, 0, 'd' },
{ "version", 0, 0, 'v' },
{ "help", 0, 0, 'h' },
{ "noprompt", 0, 0, 'N' },
{ "debug", 0, 0, 'D' },
+ { "summarizeonly", 0, 0, 'm'},
{ "summarize", 0, 0, 'm'},
{ "summary", 0, 0, 'm' },
{ 0, 0, 0, 0 }
@@ -1120,7 +1172,7 @@
oldargv = cloneargs(argc, argv);
- while ((opt = GETOPT(argc, argv, "frRq1Ss::HlLnAdDvhNm"
+ while ((opt = GETOPT(argc, argv, "frRq1Ss::HlLea::AdDvhNm"
#ifndef OMIT_GETOPT_LONG
, long_options, NULL
#endif
@@ -1142,7 +1194,7 @@
SETFLAG(flags, F_DSAMELINE);
break;
case 'S':
- SETFLAG(flags, F_SHOWSIZE);
+ SETFLAG(flags, F_SHOWCRC);
break;
case 's':
SETFLAG(flags, F_FOLLOWLINKS);
@@ -1150,8 +1202,8 @@
case 'H':
SETFLAG(flags, F_CONSIDERHARDLINKS);
break;
- case 'n':
- SETFLAG(flags, F_EXCLUDEEMPTY);
+ case 'e':
+ SETFLAG(flags, F_INCLUDEEMPTY);
break;
case 'A':
SETFLAG(flags, F_EXCLUDEHIDDEN);
@@ -1168,6 +1220,13 @@
case 'v':
printf("fdupes %s\n", VERSION);
exit(0);
+ case 'a':
+ SETFLAG(flags, F_EXCLUDESMALL);
+ if (optarg)
+ if (sscanf(optarg, "%f", &minsize))
+ break;
+ else
+ printf("'%s' is not a floating point number!\n", optarg);
case 'h':
help_text();
exit(1);
@@ -1175,15 +1234,18 @@
SETFLAG(flags, F_NOPROMPT);
break;
case 'm':
- SETFLAG(flags, F_SUMMARIZEMATCHES);
+ SETFLAG(flags, F_SUMMARIZEONLY);
break;
default:
- fprintf(stderr, "Try `fdupes --help' for more information.\n");
+ fprintf(stderr, "Didn't understand option '%c'.\nTry `fdupes --help' for
more information.\n", opt);
exit(1);
}
}
+ minsize *= 1048576;
+ printf("minsize=%f\n", minsize);
+
if (optind >= argc) {
errormsg("no directories specified\n");
exit(1);
@@ -1194,8 +1256,8 @@
exit(1);
}
- if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
- errormsg("options --summarize and --delete are not compatible\n");
+ if (ISFLAG(flags, F_SUMMARIZEONLY) && ISFLAG(flags, F_DELETEFILES)) {
+ errormsg("options --summarizeonly and --delete are not compatible\n");
exit(1);
}
@@ -1276,42 +1338,32 @@
curfile = curfile->next;
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
- fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
+ snprintf(progressbuf, sizeof(progressbuf), "Comparing Files.. (%d/%d)
%3d%% [", progress, filecount,
(int)((float) progress / (float) filecount * 100.0));
+ ioctl(STDERR_FILENO, TIOCGWINSZ, &ws);
+ width = ((ws.ws_col == 0) ? 80 : (ws.ws_col > 255) ? 255 : ws.ws_col) *
0.9; /* or - 1 */
+ bar = (int)((float) (width - strlen(progressbuf)) * (float) progress /
(float) filecount) + strlen(progressbuf);
+ for (i = strlen(progressbuf); i < width; i++) { progressbuf[i]= (bar >
i) ? '=' : ((bar == i) ? '>' : ' '); }
+ progressbuf[i] = '\0';
+ fprintf(stderr, "\r%s]", progressbuf);
progress++;
}
}
- if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
+ if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%255s\r", " ");
- if (ISFLAG(flags, F_DELETEFILES))
+ summarizematches(files);
+ if (!ISFLAG(flags, F_SUMMARIZEONLY))
{
- if (ISFLAG(flags, F_NOPROMPT))
- deletefiles(files, 0);
+ if (ISFLAG(flags, F_DELETEFILES))
+ deletefiles(files, (!ISFLAG(flags, F_NOPROMPT)));
+ else if (ISFLAG(flags, F_HARDLINKFILES))
+ hardlinkfiles(files, ISFLAG(flags, F_DEBUGINFO));
else
- deletefiles(files, 1);
+ printmatches(files);
}
- else
-
- if (ISFLAG(flags, F_HARDLINKFILES))
-
- if (ISFLAG(flags, F_DEBUGINFO))
- hardlinkfiles(files, 1);
- else
- hardlinkfiles(files, 0);
-
- else {
-
- if (ISFLAG(flags, F_SUMMARIZEMATCHES))
- summarizematches(files);
-
- else
-
- printmatches(files);
-
- }
-
+ /* clean up memory */
while (files) {
curfile = files->next;
free(files->d_name);
--- Makefile.inc/VERSION.orig 2002-05-29 04:44:29.000000000 +0200
+++ Makefile.inc/VERSION 2011-02-07 18:17:41.921682168 +0100
@@ -2,4 +2,4 @@
# VERSION determines the program's version number.
#
-VERSION = 1.50-PR2
+VERSION = 1.60-alpha
--- End Message ---