Hello community,

here is the log from the commit of package duperemove for openSUSE:Factory 
checked in at 2014-04-13 13:14:56
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/duperemove (Old)
 and      /work/SRC/openSUSE:Factory/.duperemove.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "duperemove"

Changes:
--------
--- /work/SRC/openSUSE:Factory/duperemove/duperemove.changes    2014-04-11 
13:28:12.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.duperemove.new/duperemove.changes       
2014-04-13 13:14:57.000000000 +0200
@@ -1,0 +2,9 @@
+Fri Apr 11 17:38:32 UTC 2014 - [email protected]
+
+- update to duperemove v0.07
+
+- fix usability of command line arguments
+
+- large documentation update
+
+-------------------------------------------------------------------

Old:
----
  duperemove-v0.06.tar.gz

New:
----
  duperemove-v0.07.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ duperemove.spec ++++++
--- /var/tmp/diff_new_pack.co1CMP/_old  2014-04-13 13:14:57.000000000 +0200
+++ /var/tmp/diff_new_pack.co1CMP/_new  2014-04-13 13:14:57.000000000 +0200
@@ -21,7 +21,7 @@
 Name:           duperemove
 BuildRequires:  gcc-c++
 BuildRequires:  libgcrypt-devel
-Version:        0.06
+Version:        0.07
 Release:        0
 Summary:        Software to find duplicate extents in files and remove them
 License:        GPL-2.0

++++++ duperemove-v0.06.tar.gz -> duperemove-v0.07.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/Makefile 
new/duperemove-v0.07/Makefile
--- old/duperemove-v0.06/Makefile       2014-04-11 02:11:50.000000000 +0200
+++ new/duperemove-v0.07/Makefile       2014-04-11 19:29:28.000000000 +0200
@@ -1,5 +1,5 @@
 CC=gcc
-RELEASE=v0.06
+RELEASE=v0.07
 CFLAGS=-Wall -ggdb -D_FILE_OFFSET_BITS=64 -DVERSTRING=\"$(RELEASE)\"
 
 MANPAGES=duperemove.8 btrfs-extent-same.8
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/README new/duperemove-v0.07/README
--- old/duperemove-v0.06/README 2014-04-11 02:11:50.000000000 +0200
+++ new/duperemove-v0.07/README 2014-04-11 19:29:28.000000000 +0200
@@ -4,7 +4,7 @@
 submitting them for deduplication. When given a list of files it will
 hash their contents on a block by block basis and compare those hashes
 to each other, finding and categorizing extents that match each
-other. When given the optional -D option, duperemove will submit those
+other. When given the -d option, duperemove will submit those
 extents for deduplication using the btrfs-extent-same ioctl.
 
 Duperemove has two major modes of operation one of which is a subset
@@ -13,10 +13,10 @@
 
 Readonly / Non-deduplicating Mode
 
-When run without -D (the default) duperemove will print out one or
+When run without -d (the default) duperemove will print out one or
 more tables of matching extents it has determined would be ideal
 candidates for deduplication. As a result, readonly mode is useful for
-seeing what duperemove might do when run with '-D'. The output could
+seeing what duperemove might do when run with '-d'. The output could
 also be used by some other software to submit the extents for
 deduplication at a later time.
 
@@ -35,11 +35,11 @@
 Deduping Mode
 
 This functions similarly to readonly mode with the exception that the
-duplicated extents found in our "read hash and compare" step will
+duplicated extents found in our "read, hash, and compare" step will
 actually be submitted for deduplication. At the end, a total count of
 bytes that were processed by the kernel will be printed.
 
-Keep in mind, that the bytecount we report here (recieved from the
+Keep in mind, that the bytecount we report here (received from the
 kernel) is NOT the total amount deduplicated but rather a count of the
 amount of data it also found to be identical.
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/btrfs-extent-same.c 
new/duperemove-v0.07/btrfs-extent-same.c
--- old/duperemove-v0.06/btrfs-extent-same.c    2014-04-11 02:11:50.000000000 
+0200
+++ new/duperemove-v0.07/btrfs-extent-same.c    2014-04-11 19:29:28.000000000 
+0200
@@ -11,6 +11,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
+ *
+ * Authors: Mark Fasheh <[email protected]>
  */
 
 #include <sys/types.h>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/dedupe.c 
new/duperemove-v0.07/dedupe.c
--- old/duperemove-v0.06/dedupe.c       2014-04-11 02:11:50.000000000 +0200
+++ new/duperemove-v0.07/dedupe.c       2014-04-11 19:29:28.000000000 +0200
@@ -11,6 +11,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
+ *
+ * Authors: Mark Fasheh <[email protected]>
  */
 
 #include <stdio.h>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/duperemove.8 
new/duperemove-v0.07/duperemove.8
--- old/duperemove-v0.06/duperemove.8   2014-04-11 02:11:50.000000000 +0200
+++ new/duperemove-v0.07/duperemove.8   2014-04-11 19:29:28.000000000 +0200
@@ -5,19 +5,59 @@
 \fBduperemove\fR \fI[options]\fR \fIfiles...\fI
 .SH "DESCRIPTION"
 .PP
-\fBduperemove\fR scans a file or set of files and looks for duplicated
-extents. If those are found, they are printed out. \fBduperemove\fR
-can also remove duplicates if the underlying file system supports
-it. \fIfiles\fR can refer to a list of regular files. If a directory
-is specified, all regular files within it will be also scanned.
+\fBduperemove\fR is a simple tool for finding duplicated extents and
+submitting them for deduplication. When given a list of files it will
+hash their contents on a block by block basis and compare those hashes
+to each other, finding and categorizing extents that match each
+other. When given the \fB-d\fR option, \fBduperemove\fR will submit those
+extents for deduplication using the btrfs-extent-same ioctl.
+
+.SH "GENERAL"
+Duperemove has two major modes of operation one of which is a subset
+of the other.
+
+.SS "Readonly / Non-deduplicating Mode"
+
+When run without \fB-d\fR (the default) duperemove will print out one or
+more tables of matching extents it has determined would be ideal
+candidates for deduplication. As a result, readonly mode is useful for
+seeing what duperemove might do when run with \fB-d\fR. The output could
+also be used by some other software to submit the extents for
+deduplication at a later time.
+
+It is important to note that this mode will not print out \fBall\fR
+instances of matching extents, just those it would consider for
+deduplication.
+
+Another important note is that duperemove does not concern itself with
+the underlying representation of the extents. Some of them could be
+compressed, undergoing I/O, or even have already been deduplicated. In
+dedupe mode, the kernel handles those details and therefore we try not
+to replicate that work. Think of duperemove as trying for 'bulk'
+deduplication.
+
+.SS "Deduping Mode"
+
+This functions similarly to readonly mode with the exception that the
+duplicated extents found in our "read, hash, and compare" step will
+actually be submitted for deduplication. At the end, a total count of
+bytes that were processed by the kernel will be printed.
+
+Keep in mind, that the bytecount we report here (received from the
+kernel) is \fBnot\fR the total amount deduplicated but rather a count of the
+amount of data it also found to be identical.
 
 .SH "OPTIONS"
+\fIfiles\fR can refer to a list of regular files and directories. If a
+directory is specified, all regular files within it will be also
+scanned.
+
 .TP
 \fB\-r\fR
 Enable recursive dir traversal.
 
 .TP
-\fB\-D\fR
+\fB\-d\fR
 De-dupe the results - only works on \fIbtrfs\fR.
 .TP
 
@@ -26,9 +66,8 @@
 users on readonly snapshots.
 
 .TP
-\fB\-b size-in-kilobytes\fR
-Use the specified block size. Specified in Kilobytes - the default is
-\fB128\fR.
+\fB\-b size\fR
+Use the specified block size. The default is \fB128K\fR.
 
 .TP
 \fB\-v\fR
@@ -38,6 +77,30 @@
 \fB\-h\fR
 Prints help text.
 
+.SH "FAQ"
+
+.B "Is there an upper limit to the amount of data duperemove can process?"
+
+Right now duperemove has been tested on small numbers of VMS or iso
+files (5-10). I don't believe there should be a major problem scaling
+that up to 50 or so.
+
+.B "Why does it not print out all duplicate extents?"
+
+Internally duperemove is classifying extents based on various criteria
+like length, number of identical extents, etc. The printout we give is
+based on the results of that classification.
+
+.B "How can I find out my space savings after a dedupe?"
+
+The easiest way to do this would be a df before the dedupe operation,
+then a df about 60 seconds after the operation. It is common for btrfs
+space reporting to be 'behind' while delayed updates get processed, so
+an immediate df after deduping might not show any savings.
+
+.SH "NOTES"
+Deduplication is currently only supported by the \fIbtrfs\fR filesystem.
+
 .SH "SEE ALSO"
 .BR filesystems(5)
 .BR btrfs(8)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/duperemove.c 
new/duperemove-v0.07/duperemove.c
--- old/duperemove-v0.06/duperemove.c   2014-04-11 02:11:50.000000000 +0200
+++ new/duperemove-v0.07/duperemove.c   2014-04-11 19:29:28.000000000 +0200
@@ -11,10 +11,13 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
+ *
+ * Authors: Mark Fasheh <[email protected]>
  */
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <limits.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <stdio.h>
@@ -26,6 +29,7 @@
 #include <string.h>
 #include <linux/limits.h>
 #include <ctype.h>
+#include <getopt.h>
 
 #include "rbtree.h"
 #include "list.h"
@@ -385,19 +389,19 @@
 {
        printf("duperemove %s\n", VERSTRING);
        printf("Find duplicate extents and print them to stdout\n\n");
-       printf("Usage: %s [-r] [-D] [-A] [-b blocksize] [-v] [-d]"
+       printf("Usage: %s [-r] [-d] [-A] [-b blocksize] [-v] [--debug]"
               " OBJECTS\n", prog);
        printf("Where \"OBJECTS\" is a list of files (or directories) which\n");
        printf("we want to find duplicate extents in. If a directory is \n");
        printf("specified, all regular files inside of it will be scanned.\n");
        printf("\n\t<switches>\n");
        printf("\t-r\t\tEnable recursive dir traversal.\n");
-       printf("\t-D\t\tDe-dupe the results - only works on btrfs.\n");
+       printf("\t-d\t\tDe-dupe the results - only works on btrfs.\n");
        printf("\t-A\t\tOpens files readonly when deduping. Primarily for use 
by privileged users on readonly snapshots\n");
        printf("\t-b bsize\tUse bsize blocks. Default is %dk.\n",
               DEFAULT_BLOCKSIZE / 1024);
        printf("\t-v\t\tBe verbose.\n");
-       printf("\t-d\t\tPrint debug messages, forces -v if selected.\n");
+       printf("\t--debug\t\tPrint debug messages, forces -v if selected.\n");
        printf("\t-h\t\tPrints this help text.\n");
 }
 
@@ -547,17 +551,26 @@
        return strtoull(s, NULL, 10) * mult;
 }
 
+enum {
+       DEBUG_OPTION = CHAR_MAX + 1,
+};
+
 /*
  * Ok this is doing more than just parsing options.
  */
 static int parse_options(int argc, char **argv)
 {
        int i, c, numfiles;
+       static struct option long_ops[] = {
+               { "debug", 0, 0, DEBUG_OPTION },
+               { 0, 0, 0, 0}
+       };
 
        if (argc < 2)
                return 1;
 
-       while ((c = getopt(argc, argv, "Ab:vdDrh?")) != -1) {
+       while ((c = getopt_long(argc, argv, "Ab:vdDrh?", long_ops, NULL))
+              != -1) {
                switch (c) {
                case 'A':
                        target_rw = 0;
@@ -568,13 +581,14 @@
                            blocksize > MAX_BLOCKSIZE)
                                return EINVAL;
                        break;
+               case 'd':
                case 'D':
                        run_dedupe = 1;
                        break;
                case 'r':
                        recurse_dirs = 1;
                        break;
-               case 'd':
+               case DEBUG_OPTION:
                        debug = 1;
                        /* Fall through */
                case 'v':
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/hash-tree.c 
new/duperemove-v0.07/hash-tree.c
--- old/duperemove-v0.06/hash-tree.c    2014-04-11 02:11:50.000000000 +0200
+++ new/duperemove-v0.07/hash-tree.c    2014-04-11 19:29:28.000000000 +0200
@@ -11,6 +11,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
+ *
+ * Authors: Mark Fasheh <[email protected]>
  */
 
 #include <stdlib.h>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.06/results-tree.c 
new/duperemove-v0.07/results-tree.c
--- old/duperemove-v0.06/results-tree.c 2014-04-11 02:11:50.000000000 +0200
+++ new/duperemove-v0.07/results-tree.c 2014-04-11 19:29:28.000000000 +0200
@@ -11,6 +11,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
+ *
+ * Authors: Mark Fasheh <[email protected]>
  */
 
 #include <stdlib.h>

-- 
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to