Package: fdupes
Version: 1.50-PR2-3
Source: fdupes
Tags: patch
Salut!
been hacking away at fdupes for the last days to scratch my own itches; the extra functionality i originally wanted to have is now mostly in and working. When i find the time, i will continue cleaning up the current code, perhaps implement one or two other wishlist items, also clean up the documentation. For now, this patch is what emerged and i thought better forward it ASAP to prevent bitrot, also to get feedback or testing. It is to be applied after the fdupes_1.50-PR2-3.diff.. changes as follows:

- prettified and unified printing of file sizes
- printing of file modification date
- optional printing of checksum
- empty files now excluded by default
- always print summary at top
- customizable small size limit for files to be excluded
  (i want to see ZIPs, PDFs, ISOs not HTML EXE BAK => --atleast 0.6)
- instead of typing 'all', 'a' or just hitting enter
  will preserve all files, i.e. skip on
- a PROGRESSBAR!!!11!1! so fun watching it crawl starbord
- misc code beautification

what remains to be done:
- updating/reworking of options and documentation
- result sorting
- ncurses interface?

Well, at least from my testing, no bugs introduced. And yes, it's laughable - but the --showcrc option makes me much more comfortable killing files ^^
So here it is, have a look and please report back any problems.
regards,
marcel (happy gento0er AND debianaut btw ;D)
--- fdupes.c.orig       2011-02-04 17:28:19.599253312 +0100
+++ fdupes.c    2011-02-07 15:43:38.643146444 +0100
@@ -23,6 +23,7 @@
 #include <stdarg.h>
 #include <string.h>
 #include <sys/stat.h>
+#include <sys/ioctl.h>
 #include <dirent.h>
 #include <unistd.h>
 #include <stdlib.h>
@@ -30,6 +31,7 @@
 #include <getopt.h>
 #endif
 #include <string.h>
+#include <time.h>
 #include <errno.h>
 #include <libgen.h>
 
@@ -45,21 +47,24 @@
 #define F_DSAMELINE         0x0004
 #define F_FOLLOWLINKS       0x0008
 #define F_DELETEFILES       0x0010
-#define F_EXCLUDEEMPTY      0x0020
-#define F_CONSIDERHARDLINKS 0x0040
-#define F_SHOWSIZE          0x0080
-#define F_OMITFIRST         0x0100
-#define F_RECURSEAFTER      0x0200
-#define F_NOPROMPT          0x0400
-#define F_SUMMARIZEMATCHES  0x0800
-#define F_EXCLUDEHIDDEN     0x1000
-#define F_HARDLINKFILES     0x2000
-#define F_DEBUGINFO         0x4000
+#define F_INCLUDEEMPTY      0x0020
+#define F_EXCLUDESMALL      0x0040
+#define F_EXCLUDEHIDDEN     0x0080
+#define F_CONSIDERHARDLINKS 0x0100
+#define F_SHOWCRC           0x0200
+#define F_OMITFIRST         0x0400
+#define F_RECURSEAFTER      0x0800
+#define F_NOPROMPT          0x1000
+#define F_SUMMARIZEONLY     0x2000
+#define F_HARDLINKFILES     0x4000
+#define F_DEBUGINFO         0x8000
 
 char *program_name;
 
 unsigned long flags = 0;
 
+float minsize = 1;
+
 #define CHUNK_SIZE 8192
 
 #define INPUT_SIZE 256
@@ -145,6 +150,30 @@
   }
 }
 
+char *formatbytesize(off_t size)
+{
+  static char sizestr[64];
+
+  if (size < 2) snprintf(sizestr, 64, "%zu Byte", size);
+  else if (size < 1024) snprintf(sizestr, 64, "%zu Bytes", size);
+  else if (size < 1048576) snprintf(sizestr, 64, "%.2f Kibibytes", (float) 
size / 1024);
+  else if (size < 1073741824) snprintf(sizestr, 64, "%.2f Mebibytes", (float) 
size / 1048576);
+  else snprintf(sizestr, 64, "%.2f Gibibytes", (float) size / 1073741824);
+  return sizestr;
+}
+
+void printfiledetails(file_t *file)
+{
+  char  time[42];
+
+  strftime(time, sizeof(time), "%Y-%m-%d %H:%M:%S", localtime(&file->mtime));
+  printf("%s (%s)", file->d_name, time);
+  if (ISFLAG(flags, F_SHOWCRC))
+    printf(" CRC:%s\n", file->crcsignature);
+  else
+    printf("\n");
+}
+
 off_t filesize(char *filename) {
   struct stat s;
 
@@ -237,7 +266,7 @@
 {
   DIR *cd;
   file_t *newfile;
-  struct dirent *dirinfo;
+  struct dirent *direntry;
   int lastchar;
   int filecount = 0;
   struct stat info;
@@ -253,8 +282,8 @@
     return 0;
   }
 
-  while ((dirinfo = readdir(cd)) != NULL) {
-    if (strcmp(dirinfo->d_name, ".") && strcmp(dirinfo->d_name, "..")) {
+  while ((direntry = readdir(cd)) != NULL) {
+    if (strcmp(direntry->d_name, ".") && strcmp(direntry->d_name, "..")) {
       if (!ISFLAG(flags, F_HIDEPROGRESS)) {
        fprintf(stderr, "\rBuilding file list %c ", indicator[progress]);
        progress = (progress + 1) % 4;
@@ -275,7 +304,7 @@
       newfile->duplicates = NULL;
       newfile->hasdupes = 0;
 
-      newfile->d_name = (char*)malloc(strlen(dir)+strlen(dirinfo->d_name)+2);
+      newfile->d_name = (char*)malloc(strlen(dir)+strlen(direntry->d_name)+2);
 
       if (!newfile->d_name) {
        errormsg("out of memory!\n");
@@ -288,7 +317,7 @@
       lastchar = strlen(dir) - 1;
       if (lastchar >= 0 && dir[lastchar] != '/')
        strcat(newfile->d_name, "/");
-      strcat(newfile->d_name, dirinfo->d_name);
+      strcat(newfile->d_name, direntry->d_name);
       
       if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
        fullname = strdup(newfile->d_name);
@@ -301,7 +330,9 @@
        free(fullname);
       }
 
-      if (filesize(newfile->d_name) == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) {
+      if ((filesize(newfile->d_name) == 0 && !ISFLAG(flags, F_INCLUDEEMPTY)) ||
+          (direntry->d_type == DT_REG && ISFLAG(flags, F_EXCLUDESMALL) && 
filesize(newfile->d_name) < (int) minsize)) {
+//         printf("excluding %s size %s\n", newfile->d_name, 
formatbytesize(filesize(newfile->d_name)));
        free(newfile->d_name);
        free(newfile);
        continue;
@@ -314,6 +345,7 @@
       }
 
       if (lstat(newfile->d_name, &linfo) == -1) {
+        printf("lstat failed on %s size %s\n", newfile->d_name, 
formatbytesize(filesize(newfile->d_name)));
        free(newfile->d_name);
        free(newfile);
        continue;
@@ -646,15 +678,8 @@
   if (numsets == 0)
     printf("No duplicates found.\n\n");
   else
-  {
-    if (numbytes < 1024.0)
-      printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n", 
numfiles, numsets, numbytes);
-    else if (numbytes <= (1000.0 * 1000.0))
-      printf("%d duplicate files (in %d sets), occupying %.1f kylobytes\n\n", 
numfiles, numsets, numbytes / 1000.0);
-    else
-      printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n", 
numfiles, numsets, numbytes / (1000.0 * 1000.0));
- 
-  }
+    printf("%d duplicate files (in %d sets), occupying %s.\n\n",
+          numfiles, numsets, formatbytesize(numbytes));
 }
 
 void printmatches(file_t *files)
@@ -664,21 +689,29 @@
   while (files != NULL) {
     if (files->hasdupes) {
       if (!ISFLAG(flags, F_OMITFIRST)) {
-       if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", files->size,
-        (files->size != 1) ? "s " : " ");
-       if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
-       printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
+       printf("%s each:\n", formatbytesize(files->size));
+       if (ISFLAG(flags, F_DSAMELINE))
+       {
+         escapefilename("\\ ", &files->d_name);
+         printf("%s ", files->d_name);
+       }
+       else
+         printfiledetails(files);
       }
       tmpfile = files->duplicates;
       while (tmpfile != NULL) {
-       if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &tmpfile->d_name);
-       printf("%s%c", tmpfile->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
+       if (ISFLAG(flags, F_DSAMELINE)) 
+       {
+         escapefilename("\\ ", &tmpfile->d_name);
+         printf("%s ", tmpfile->d_name);
+       }
+       else
+         printfiledetails(tmpfile);
        tmpfile = tmpfile->duplicates;
       }
       printf("\n");
-
     }
-      
+    
     files = files->next;
   }
 }
@@ -788,25 +821,38 @@
     exit(1);
   }
 
+  printf("Hint:\n");
+  printf("The files to keep for each match set can be specified\n");
+  printf("individually by listing the corresponding numbers\n");
+  printf("delimited by spaces and commata (as in [1 2 3] or [1,4])\n");
+  printf(" - or enter [all]/[a]/[RETURN] to not delete any files.\n");
+  printf("The sets are sorted as last modified, oldest copy on top.\n\n");
   while (files) {
     if (files->hasdupes) {
       curgroup++;
       counter = 1;
       dupelist[counter] = files;
 
-      if (prompt) printf("[%d] %s\n", counter, files->d_name);
+      if (prompt) {
+        printf("[%d] ", counter);
+       printfiledetails(files);
+      }
 
       tmpfile = files->duplicates;
 
       while (tmpfile) {
        dupelist[++counter] = tmpfile;
-       if (prompt) printf("[%d] %s\n", counter, tmpfile->d_name);
+        
+       if (prompt) {
+          printf("[%d] ", counter);
+          printfiledetails(tmpfile);
+        }
        tmpfile = tmpfile->duplicates;
       }
 
       if (prompt) printf("\n");
 
-      if (!prompt) /* preserve only the first file */
+      if (!prompt) /* preserve first file & delete clones */
       {
          preserve[1] = 1;
         for (x = 2; x <= counter; x++) preserve[x] = 0;
@@ -815,11 +861,8 @@
       else /* prompt for files to preserve */
 
       do {
-       printf("Set %d of %d, preserve files [1 - %d, all]", 
-          curgroup, groups, counter);
-       if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", files->size,
-         (files->size != 1) ? "s " : " ");
-       printf(": ");
+       printf("Set %d of %d (%s each), files to keep [1 - %d, all]: ",
+          curgroup, groups, formatbytesize(files->size), counter);
        fflush(stdout);
 
        fgets(preservestr, INPUT_SIZE, stdin);
@@ -843,17 +886,19 @@
        for (x = 1; x <= counter; x++) preserve[x] = 0;
        
        token = strtok(preservestr, " ,\n");
-       
-       while (token != NULL) {
-         if (strcasecmp(token, "all") == 0)
+
+       do {
+         if (token == NULL || strcasecmp(token, "a") == 0 || strcasecmp(token, 
"all") == 0)
            for (x = 0; x <= counter; x++) preserve[x] = 1;
+         if (token != NULL && strcasecmp(token, "none") == 0)
+           for (x = 0; x <= counter; x++) preserve[x] = 0;
          
          number = 0;
-         sscanf(token, "%d", &number);
+         if (token) sscanf(token, "%d", &number);
          if (number > 0 && number <= counter) preserve[number] = 1;
          
          token = strtok(NULL, " ,\n");
-       }
+       } while (token != NULL);
       
        for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x];
       } while (sum < 1); /* make sure we've preserved at least one file */
@@ -1043,12 +1088,13 @@
   printf(" -H --hardlinks   \tnormally, when two or more files point to the 
same\n");
   printf("                  \tdisk area they are treated as non-duplicates; 
this\n"); 
   printf("                  \toption will change this behavior\n");
-  printf(" -n --noempty     \texclude zero-length files from consideration\n");
-  printf(" -A --nohidden    \texclude hidden files from consideration\n");
+  printf(" -e --empty       \tinclude zero-length files (excluded by default 
as of 1.60)\n");
+  printf(" -a[n] --atleast[=n]\texclude files smaller n MiB (default: 
1.0)\n"); 
+  printf(" -A --nohidden    \texclude hidden files\n");
   printf(" -f --omitfirst   \tomit the first file in each set of matches\n");
   printf(" -1 --sameline    \tlist each set of matches on a single line\n");
-  printf(" -S --size        \tshow size of duplicate files\n");
-  printf(" -m --summarize   \tsummarize dupe information\n");
+  printf(" -S --showcrc     \tshow check of each duplicate file (rip, 
paranoiac ;)\n");
+  printf(" -m --summarizeonly\tonly summarize found dupes\n");
   printf(" -q --quiet       \thide progress indicator\n");
   printf(" -d --delete      \tprompt user for files to preserve and delete 
all\n"); 
   printf("                  \tothers; important: under particular 
circumstances,\n");
@@ -1056,14 +1102,12 @@
   printf("                  \twith -s or --symlinks, or when specifying a\n");
   printf("                  \tparticular directory more than once; refer to 
the\n");
   printf("                  \tfdupes documentation for additional 
information\n");
-  /* printf(" -r --dlink     \t(description)\n"); */
   printf(" -L --linkhard    \thardlink duplicate files to the first file 
in\n");
   printf("                  \teach set of duplicates without prompting the 
user\n");
-  printf(" -N --noprompt    \ttogether with --delete, preserve the first file 
in\n");
+  printf(" -N --noprompt    \ttogether with --delete, preserve the oldest copy 
in\n");
   printf("                  \teach set of duplicates and delete the rest 
without\n");
-  printf("                  \twithout prompting the user\n");
+  printf("                  \tprior confirmation (DANGEROUS!)\n");
   printf(" -D --debug       \tenable debugging information\n");
-  printf("                  \teach set of duplicates without prompting the 
user\n");
   printf(" -v --version     \tdisplay fdupes version\n");
   printf(" -h --help        \tdisplay this help message\n\n");
 #ifdef OMIT_GETOPT_LONG
@@ -1073,6 +1117,7 @@
 
 int main(int argc, char **argv) {
   int x;
+  int i;
   int opt;
   FILE *file1;
   FILE *file2;
@@ -1080,9 +1125,13 @@
   file_t *curfile;
   file_t **match = NULL;
   filetree_t *checktree = NULL;
+  struct winsize ws;
+  int width = 0;
   int filecount = 0;
   int progress = 0;
+  int bar = 0;
   char **oldargv;
+  char   progressbuf[256];
   int firstrecurse;
   
 #ifndef OMIT_GETOPT_LONG
@@ -1095,18 +1144,21 @@
     { "recursive:", 0, 0, 'R' },
     { "quiet", 0, 0, 'q' },
     { "sameline", 0, 0, '1' },
-    { "size", 0, 0, 'S' },
+    { "showcrc", 0, 0, 'S' },
+    { "size", 0, 0, 'S' }, // deprecated
     { "symlinks", 0, 0, 's' },
     { "hardlinks", 0, 0, 'H' },
     { "relink", 0, 0, 'l' },
     { "linkhard", 0, 0, 'L' },
-    { "noempty", 0, 0, 'n' },
+    { "empty", 0, 0, 'e' },
+    { "atleast", 2, 0, 'a' },
     { "nohidden", 0, 0, 'A' },
     { "delete", 0, 0, 'd' },
     { "version", 0, 0, 'v' },
     { "help", 0, 0, 'h' },
     { "noprompt", 0, 0, 'N' },
     { "debug", 0, 0, 'D' },
+    { "summarizeonly", 0, 0, 'm'},
     { "summarize", 0, 0, 'm'},
     { "summary", 0, 0, 'm' },
     { 0, 0, 0, 0 }
@@ -1120,7 +1172,7 @@
 
   oldargv = cloneargs(argc, argv);
 
-  while ((opt = GETOPT(argc, argv, "frRq1Ss::HlLnAdDvhNm"
+  while ((opt = GETOPT(argc, argv, "frRq1Ss::HlLea::AdDvhNm"
 #ifndef OMIT_GETOPT_LONG
           , long_options, NULL
 #endif
@@ -1142,7 +1194,7 @@
       SETFLAG(flags, F_DSAMELINE);
       break;
     case 'S':
-      SETFLAG(flags, F_SHOWSIZE);
+      SETFLAG(flags, F_SHOWCRC);
       break;
     case 's':
       SETFLAG(flags, F_FOLLOWLINKS);
@@ -1150,8 +1202,8 @@
     case 'H':
       SETFLAG(flags, F_CONSIDERHARDLINKS);
       break;
-    case 'n':
-      SETFLAG(flags, F_EXCLUDEEMPTY);
+    case 'e':
+      SETFLAG(flags, F_INCLUDEEMPTY);
       break;
     case 'A':
       SETFLAG(flags, F_EXCLUDEHIDDEN);
@@ -1168,6 +1220,13 @@
     case 'v':
       printf("fdupes %s\n", VERSION);
       exit(0);
+    case 'a':
+      SETFLAG(flags, F_EXCLUDESMALL);
+      if (optarg)
+       if (sscanf(optarg, "%f", &minsize))
+         break;
+       else
+         printf("'%s' is not a floating point number!\n", optarg);
     case 'h':
       help_text();
       exit(1);
@@ -1175,15 +1234,18 @@
       SETFLAG(flags, F_NOPROMPT);
       break;
     case 'm':
-      SETFLAG(flags, F_SUMMARIZEMATCHES);
+      SETFLAG(flags, F_SUMMARIZEONLY);
       break;
 
     default:
-      fprintf(stderr, "Try `fdupes --help' for more information.\n");
+      fprintf(stderr, "Didn't understand option '%c'.\nTry `fdupes --help' for 
more information.\n", opt);
       exit(1);
     }
   }
 
+  minsize *= 1048576;
+  printf("minsize=%f\n", minsize);
+
   if (optind >= argc) {
     errormsg("no directories specified\n");
     exit(1);
@@ -1194,8 +1256,8 @@
     exit(1);
   }
 
-  if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
-    errormsg("options --summarize and --delete are not compatible\n");
+  if (ISFLAG(flags, F_SUMMARIZEONLY) && ISFLAG(flags, F_DELETEFILES)) {
+    errormsg("options --summarizeonly and --delete are not compatible\n");
     exit(1);
   }
 
@@ -1276,42 +1338,32 @@
     curfile = curfile->next;
 
     if (!ISFLAG(flags, F_HIDEPROGRESS)) {
-      fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
+      snprintf(progressbuf, sizeof(progressbuf), "Comparing Files.. (%d/%d) 
%3d%% [", progress, filecount,
        (int)((float) progress / (float) filecount * 100.0));
+      ioctl(STDERR_FILENO, TIOCGWINSZ, &ws);
+      width = ((ws.ws_col == 0) ? 80 : (ws.ws_col > 255) ? 255 : ws.ws_col) * 
0.9; /* or - 1 */
+      bar = (int)((float) (width - strlen(progressbuf)) * (float) progress / 
(float) filecount) + strlen(progressbuf);
+      for (i = strlen(progressbuf); i < width; i++) { progressbuf[i]= (bar > 
i) ? '=' : ((bar == i) ? '>' : ' '); }
+      progressbuf[i] = '\0';
+      fprintf(stderr, "\r%s]", progressbuf);
       progress++;
     }
   }
 
-  if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
+  if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%255s\r", " ");
 
-  if (ISFLAG(flags, F_DELETEFILES))
+  summarizematches(files);
+  if (!ISFLAG(flags, F_SUMMARIZEONLY))
   {
-    if (ISFLAG(flags, F_NOPROMPT))
-      deletefiles(files, 0);
+    if (ISFLAG(flags, F_DELETEFILES))
+      deletefiles(files, (!ISFLAG(flags, F_NOPROMPT)));
+    else if (ISFLAG(flags, F_HARDLINKFILES))
+      hardlinkfiles(files, ISFLAG(flags, F_DEBUGINFO));
     else
-      deletefiles(files, 1);
+      printmatches(files);
   }
 
-  else 
-
-    if (ISFLAG(flags, F_HARDLINKFILES))
-
-        if (ISFLAG(flags, F_DEBUGINFO))
-            hardlinkfiles(files, 1);
-        else
-            hardlinkfiles(files, 0);
-
-    else {
-    
-        if (ISFLAG(flags, F_SUMMARIZEMATCHES))
-            summarizematches(files);
-
-        else
-
-            printmatches(files);
-
-    }
-
+  /* clean up memory */
   while (files) {
     curfile = files->next;
     free(files->d_name);
--- Makefile.inc/VERSION.orig   2002-05-29 04:44:29.000000000 +0200
+++ Makefile.inc/VERSION        2011-02-07 18:17:41.921682168 +0100
@@ -2,4 +2,4 @@
 # VERSION determines the program's version number.
 #
 
-VERSION = 1.50-PR2
+VERSION = 1.60-alpha

Reply via email to