The following changes since commit b5b571a3f01c17ddb39fd0306cb425a11e216f3d:

  Fix compile for FIO_INC_DEBUG not set (2014-09-24 09:54:24 -0600)

are available in the git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 1066358aebafb7221732bedd6fb9fde56b14be7b:

  Improve dedupe/compression buffer filling for mixed block sizes (2014-09-26 
15:04:58 -0600)

----------------------------------------------------------------
Andrey Kuzmin (2):
      Add ability to use an include file in a fio job file
      Add HOWTO section on include files

Jens Axboe (10):
      Add bloom filter
      dedupe: default to using a bloom filter to save memory
      Add debug helper stub for t/ programs
      t/lfsr-test: fixup time
      dedupe: fix warning and segfault on -B0
      dedupe: read in larger chunks at the time
      bloom: always use a larger minimum size for bloom filter
      Clarify that include files may not contain job sections
      bloom: use independent hashes
      Improve dedupe/compression buffer filling for mixed block sizes

 HOWTO         |   35 +++++++++++++
 Makefile      |    9 ++--
 crc/xxhash.c  |    2 +-
 crc/xxhash.h  |    2 +-
 fio_time.h    |    1 +
 init.c        |  162 +++++++++++++++++++++++++++++++++++++++------------------
 io_u.c        |   32 +++++++-----
 lib/bloom.c   |  107 +++++++++++++++++++++++++++++++++++++
 lib/bloom.h   |   13 +++++
 t/debug.c     |   14 +++++
 t/debug.h     |    6 +++
 t/dedupe.c    |  150 ++++++++++++++++++++++++++++++++++++----------------
 t/lfsr-test.c |   13 ++---
 t/stest.c     |   12 +----
 14 files changed, 430 insertions(+), 128 deletions(-)
 create mode 100644 lib/bloom.c
 create mode 100644 lib/bloom.h
 create mode 100644 t/debug.c
 create mode 100644 t/debug.h

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO
index 23746ce..aaa46f8 100644
--- a/HOWTO
+++ b/HOWTO
@@ -159,6 +159,41 @@ specify:
 
 $ fio --name=random-writers --ioengine=libaio --iodepth=4 --rw=randwrite 
--bs=32k --direct=0 --size=64m --numjobs=4
 
+When fio is utilized as a basis of any reasonably large test suite, it might be
+desirable to share a set of standardized settings across multiple job files.
+Instead of copy/pasting such settings, any section may pull in an external
+.fio file with 'include filename' directive, as in the following example:
+
+; -- start job file including.fio --
+[global]
+filename=/tmp/test
+filesize=1m
+include glob-include.fio
+
+[test]
+rw=randread
+bs=4k
+time_based=1
+runtime=10
+include test-include.fio
+; -- end job file including.fio --
+
+; -- start job file glob-include.fio --
+thread=1
+group_reporting=1
+; -- end job file glob-include.fio --
+
+; -- start job file test-include.fio --
+ioengine=libaio
+iodepth=4
+; -- end job file test-include.fio --
+
+Settings pulled into a section apply to that section only (except global
+section). Include directives may be nested in that any included file may
+contain further include directive(s). Include files may not contain []
+sections.
+
+
 4.1 Environment variables
 -------------------------
 
diff --git a/Makefile b/Makefile
index fe439c1..8c424e3 100644
--- a/Makefile
+++ b/Makefile
@@ -36,7 +36,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c 
filesetup.c \
                lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \
                lib/hweight.c lib/getrusage.c idletime.c td_error.c \
                profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
-               lib/tp.c
+               lib/tp.c lib/bloom.c
 
 ifdef CONFIG_LIBHDFS
   HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I 
$(FIO_LIBHDFS_INCLUDE)
@@ -164,7 +164,7 @@ GFIO_OBJS = $(OBJS) gfio.o graph.o tickmarks.o ghelpers.o 
goptions.o gerror.o \
 -include $(OBJS:.o=.d)
 
 T_SMALLOC_OBJS = t/stest.o
-T_SMALLOC_OBJS += gettime.o mutex.o smalloc.o t/log.o
+T_SMALLOC_OBJS += gettime.o mutex.o smalloc.o t/log.o t/debug.o
 T_SMALLOC_PROGS = t/stest
 
 T_IEEE_OBJS = t/ieee754.o
@@ -180,7 +180,7 @@ T_AXMAP_OBJS += lib/lfsr.o lib/axmap.o
 T_AXMAP_PROGS = t/axmap
 
 T_LFSR_TEST_OBJS = t/lfsr-test.o
-T_LFSR_TEST_OBJS += lib/lfsr.o
+T_LFSR_TEST_OBJS += lib/lfsr.o gettime.o t/log.o t/debug.o
 T_LFSR_TEST_PROGS = t/lfsr-test
 
 ifeq ($(CONFIG_TARGET_OS), Linux)
@@ -192,7 +192,8 @@ endif
 ifeq ($(CONFIG_TARGET_OS), Linux)
 T_DEDUPE_OBJS = t/dedupe.o
 T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
-               memalign.o
+               memalign.o lib/bloom.o t/debug.o crc/xxhash.o crc/crc32c.o \
+               crc/crc32c-intel.o
 T_DEDUPE_PROGS = t/dedupe
 endif
 
diff --git a/crc/xxhash.c b/crc/xxhash.c
index eedaecb..4736c52 100644
--- a/crc/xxhash.c
+++ b/crc/xxhash.c
@@ -221,7 +221,7 @@ static uint32_t XXH32_endian_align(const void* input, int 
len, uint32_t seed, XX
 }
 
 
-uint32_t XXH32(const void* input, int len, uint32_t seed)
+uint32_t XXH32(const void* input, uint32_t len, uint32_t seed)
 {
 #if 0
     // Simple version, good for code maintenance, but unfortunately slow for 
small inputs
diff --git a/crc/xxhash.h b/crc/xxhash.h
index e80a91d..8850d20 100644
--- a/crc/xxhash.h
+++ b/crc/xxhash.h
@@ -88,7 +88,7 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 // Simple Hash Functions
 //****************************
 
-unsigned int XXH32 (const void* input, int len, unsigned int seed);
+uint32_t XXH32 (const void* input, uint32_t len, uint32_t seed);
 
 /*
 XXH32() :
diff --git a/fio_time.h b/fio_time.h
index c550a55..9f7d209 100644
--- a/fio_time.h
+++ b/fio_time.h
@@ -1,6 +1,7 @@
 #ifndef FIO_TIME_H
 #define FIO_TIME_H
 
+struct thread_data;
 extern uint64_t utime_since(struct timeval *, struct timeval *);
 extern uint64_t utime_since_now(struct timeval *);
 extern uint64_t mtime_since(struct timeval *, struct timeval *);
diff --git a/init.c b/init.c
index 5b0290d..e208451 100644
--- a/init.c
+++ b/init.c
@@ -1397,11 +1397,12 @@ static int is_empty_or_comment(char *line)
 /*
  * This is our [ini] type file parser.
  */
-int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
+int __parse_jobs_ini(struct thread_data *td,
+               char *file, int is_buf, int stonewall_flag, int type,
+               int nested, char *name, char ***popts, int *aopts, int *nopts)
 {
-       unsigned int global;
-       struct thread_data *td;
-       char *string, *name;
+       unsigned int global = 0;
+       char *string;
        FILE *f;
        char *p;
        int ret = 0, stonewall;
@@ -1411,6 +1412,9 @@ int parse_jobs_ini(char *file, int is_buf, int 
stonewall_flag, int type)
        char **opts;
        int i, alloc_opts, num_opts;
 
+       dprint(FD_PARSE, "Parsing ini file %s\n", file);
+       assert(td || !nested);
+
        if (is_buf)
                f = NULL;
        else {
@@ -1430,12 +1434,23 @@ int parse_jobs_ini(char *file, int is_buf, int 
stonewall_flag, int type)
        /*
         * it's really 256 + small bit, 280 should suffice
         */
-       name = malloc(280);
-       memset(name, 0, 280);
+       if (!nested) {
+               name = malloc(280);
+               memset(name, 0, 280);
+       }
 
-       alloc_opts = 8;
-       opts = malloc(sizeof(char *) * alloc_opts);
-       num_opts = 0;
+       opts = NULL;
+       if (nested && popts) {
+               opts = *popts;
+               alloc_opts = *aopts;
+               num_opts = *nopts;
+       }
+
+       if (!opts) {
+               alloc_opts = 8;
+               opts = malloc(sizeof(char *) * alloc_opts);
+               num_opts = 0;
+       }
 
        stonewall = stonewall_flag;
        do {
@@ -1456,58 +1471,72 @@ int parse_jobs_ini(char *file, int is_buf, int 
stonewall_flag, int type)
                strip_blank_front(&p);
                strip_blank_end(p);
 
+               dprint(FD_PARSE, "%s\n", p);
                if (is_empty_or_comment(p))
                        continue;
-               if (sscanf(p, "[%255[^\n]]", name) != 1) {
-                       if (inside_skip)
+
+               if (!nested) {
+                       if (sscanf(p, "[%255[^\n]]", name) != 1) {
+                               if (inside_skip)
+                                       continue;
+
+                               log_err("fio: option <%s> outside of "
+                                       "[] job section\n", p);
+                               break;
+                       }
+
+                       name[strlen(name) - 1] = '\0';
+
+                       if (skip_this_section(name)) {
+                               inside_skip = 1;
                                continue;
-                       log_err("fio: option <%s> outside of [] job section\n",
-                                                                       p);
-                       break;
-               }
+                       } else
+                               inside_skip = 0;
 
-               name[strlen(name) - 1] = '\0';
+                       dprint(FD_PARSE, "Parsing section [%s]\n", name);
 
-               if (skip_this_section(name)) {
-                       inside_skip = 1;
-                       continue;
-               } else
-                       inside_skip = 0;
+                       global = !strncmp(name, "global", 6);
 
-               global = !strncmp(name, "global", 6);
+                       if (dump_cmdline) {
+                               if (first_sect)
+                                       log_info("fio ");
+                               if (!global)
+                                       log_info("--name=%s ", name);
+                               first_sect = 0;
+                       }
 
-               if (dump_cmdline) {
-                       if (first_sect)
-                               log_info("fio ");
-                       if (!global)
-                               log_info("--name=%s ", name);
-                       first_sect = 0;
-               }
+                       td = get_new_job(global, &def_thread, 0, name);
+                       if (!td) {
+                               ret = 1;
+                               break;
+                       }
 
-               td = get_new_job(global, &def_thread, 0, name);
-               if (!td) {
-                       ret = 1;
-                       break;
-               }
+                       /*
+                        * Separate multiple job files by a stonewall
+                        */
+                       if (!global && stonewall) {
+                               td->o.stonewall = stonewall;
+                               stonewall = 0;
+                       }
 
-               /*
-                * Separate multiple job files by a stonewall
-                */
-               if (!global && stonewall) {
-                       td->o.stonewall = stonewall;
-                       stonewall = 0;
+                       num_opts = 0;
+                       memset(opts, 0, alloc_opts * sizeof(char *));
                }
-
-               num_opts = 0;
-               memset(opts, 0, alloc_opts * sizeof(char *));
+               else
+                       skip_fgets = 1;
 
                while (1) {
-                       if (is_buf)
-                               p = strsep(&file, "\n");
+                       if (!skip_fgets) {
+                               if (is_buf)
+                                       p = strsep(&file, "\n");
+                               else
+                                       p = fgets(string, 4096, f);
+                               if (!p)
+                                       break;
+                               dprint(FD_PARSE, "%s", p);
+                       }
                        else
-                               p = fgets(string, 4096, f);
-                       if (!p)
-                               break;
+                               skip_fgets = 0;
 
                        if (is_empty_or_comment(p))
                                continue;
@@ -1519,12 +1548,30 @@ int parse_jobs_ini(char *file, int is_buf, int 
stonewall_flag, int type)
                         * fgets() a new line at the top.
                         */
                        if (p[0] == '[') {
+                               if (nested) {
+                                       log_err("No new sections in included 
files\n");
+                                       return 1;
+                               }
+
                                skip_fgets = 1;
                                break;
                        }
 
                        strip_blank_end(p);
 
+                       if (!strncmp(p, "include", strlen("include"))) {
+                               char *filename = p + strlen("include") + 1;
+
+                               if ((ret = __parse_jobs_ini(td, filename,
+                                               is_buf, stonewall_flag, type, 1,
+                                               name, &opts, &alloc_opts, 
&num_opts))) {
+                                       log_err("Error %d while parsing include 
file %s\n",
+                                               ret, filename);
+                                       break;
+                               }
+                               continue;
+                       }
+
                        if (num_opts == alloc_opts) {
                                alloc_opts <<= 1;
                                opts = realloc(opts,
@@ -1535,6 +1582,13 @@ int parse_jobs_ini(char *file, int is_buf, int 
stonewall_flag, int type)
                        num_opts++;
                }
 
+               if (nested) {
+                       *popts = opts;
+                       *aopts = alloc_opts;
+                       *nopts = num_opts;
+                       goto out;
+               }
+
                ret = fio_options_parse(td, opts, num_opts, dump_cmdline);
                if (!ret)
                        ret = add_job(td, name, 0, 0, type);
@@ -1557,14 +1611,22 @@ int parse_jobs_ini(char *file, int is_buf, int 
stonewall_flag, int type)
                i++;
        }
 
-       free(string);
-       free(name);
        free(opts);
+out:
+       free(string);
+       if (!nested)
+               free(name);
        if (!is_buf && f != stdin)
                fclose(f);
        return ret;
 }
 
+int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
+{
+       return __parse_jobs_ini(NULL, file, is_buf, stonewall_flag, type,
+                       0, NULL, NULL, NULL, NULL);
+}
+
 static int fill_def_thread(void)
 {
        memset(&def_thread, 0, sizeof(def_thread));
diff --git a/io_u.c b/io_u.c
index eac871b..8546899 100644
--- a/io_u.c
+++ b/io_u.c
@@ -1487,7 +1487,8 @@ struct io_u *get_io_u(struct thread_data *td)
                if (io_u->ddir == DDIR_WRITE) {
                        if (td->flags & TD_F_REFILL_BUFFERS) {
                                io_u_fill_buffer(td, io_u,
-                                       io_u->xfer_buflen, io_u->xfer_buflen);
+                                       td->o.min_bs[DDIR_WRITE],
+                                       io_u->xfer_buflen);
                        } else if ((td->flags & TD_F_SCRAMBLE_BUFFERS) &&
                                   !(td->flags & TD_F_COMPRESS))
                                do_scramble = 1;
@@ -1864,22 +1865,29 @@ void fill_io_buffer(struct thread_data *td, void *buf, 
unsigned int min_write,
        else if (!td->o.zero_buffers) {
                unsigned int perc = td->o.compress_percentage;
                struct frand_state *rs;
+               unsigned int left = max_bs;
 
-               rs = get_buf_state(td);
+               do {
+                       rs = get_buf_state(td);
 
-               if (perc) {
-                       unsigned int seg = min_write;
+                       min_write = min(min_write, left);
 
-                       seg = min(min_write, td->o.compress_chunk);
-                       if (!seg)
-                               seg = min_write;
+                       if (perc) {
+                               unsigned int seg = min_write;
 
-                       fill_random_buf_percentage(rs, buf, perc, seg,max_bs);
-                       save_buf_state(td, rs);
-               } else {
-                       fill_random_buf(rs, buf, max_bs);
+                               seg = min(min_write, td->o.compress_chunk);
+                               if (!seg)
+                                       seg = min_write;
+
+                               fill_random_buf_percentage(rs, buf, perc, seg,
+                                                               min_write);
+                       } else
+                               fill_random_buf(rs, buf, min_write);
+
+                       buf += min_write;
+                       left -= min_write;
                        save_buf_state(td, rs);
-               }
+               } while (left);
        } else
                memset(buf, 0, max_bs);
 }
diff --git a/lib/bloom.c b/lib/bloom.c
new file mode 100644
index 0000000..b469fde
--- /dev/null
+++ b/lib/bloom.c
@@ -0,0 +1,107 @@
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "bloom.h"
+#include "../hash.h"
+#include "../minmax.h"
+#include "../crc/xxhash.h"
+#include "../crc/crc32c.h"
+
+struct bloom {
+       uint64_t nentries;
+
+       uint32_t *map;
+};
+
+#define BITS_PER_INDEX (sizeof(uint32_t) * 8)
+#define BITS_INDEX_MASK        (BITS_PER_INDEX - 1)
+
+struct bloom_hash {
+       unsigned int seed;
+       uint32_t (*fn)(const void *, uint32_t, uint32_t);
+};
+
+static uint32_t b_crc32c(const void *buf, uint32_t len, uint32_t seed)
+{
+       return fio_crc32c(buf, len);
+}
+
+struct bloom_hash hashes[] = {
+       {
+               .seed = 0x8989,
+               .fn = jhash,
+       },
+       {
+               .seed = 0x8989,
+               .fn = XXH32,
+       },
+       {
+               .seed = 0,
+               .fn = b_crc32c,
+       },
+};
+
+#define N_HASHES       3
+
+#define MIN_ENTRIES    1073741824UL
+
+struct bloom *bloom_new(uint64_t entries)
+{
+       struct bloom *b;
+       size_t no_uints;
+
+       crc32c_intel_probe();
+
+       b = malloc(sizeof(*b));
+       b->nentries = entries;
+       no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
+       no_uints = max((unsigned long) no_uints, MIN_ENTRIES);
+       b->map = calloc(no_uints, sizeof(uint32_t));
+       if (!b->map) {
+               free(b);
+               return NULL;
+       }
+
+       return b;
+}
+
+void bloom_free(struct bloom *b)
+{
+       free(b->map);
+       free(b);
+}
+
+static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
+                        int set)
+{
+       uint32_t hash[N_HASHES];
+       int i, was_set;
+
+       for (i = 0; i < N_HASHES; i++) {
+               hash[i] = hashes[i].fn(data, nwords, hashes[i].seed);
+               hash[i] = hash[i] % b->nentries;
+       }
+
+       was_set = 0;
+       for (i = 0; i < N_HASHES; i++) {
+               const unsigned int index = hash[i] / BITS_PER_INDEX;
+               const unsigned int bit = hash[i] & BITS_INDEX_MASK;
+
+               if (b->map[index] & (1U << bit))
+                       was_set++;
+               if (set)
+                       b->map[index] |= 1U << bit;
+       }
+
+       return was_set == N_HASHES;
+}
+
+int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords)
+{
+       return __bloom_check(b, data, nwords, 0);
+}
+
+int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
+{
+       return __bloom_check(b, data, nwords, 1);
+}
diff --git a/lib/bloom.h b/lib/bloom.h
new file mode 100644
index 0000000..b3cde95
--- /dev/null
+++ b/lib/bloom.h
@@ -0,0 +1,13 @@
+#ifndef FIO_BLOOM_H
+#define FIO_BLOOM_H
+
+#include <inttypes.h>
+
+struct bloom;
+
+struct bloom *bloom_new(uint64_t entries);
+void bloom_free(struct bloom *b);
+int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords);
+int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords);
+
+#endif
diff --git a/t/debug.c b/t/debug.c
new file mode 100644
index 0000000..c297d61
--- /dev/null
+++ b/t/debug.c
@@ -0,0 +1,14 @@
+#include <stdio.h>
+
+FILE *f_err;
+struct timeval *fio_tv = NULL;
+unsigned int fio_debug = 0;
+
+void __dprint(int type, const char *str, ...)
+{
+}
+
+void debug_init(void)
+{
+       f_err = stderr;
+}
diff --git a/t/debug.h b/t/debug.h
new file mode 100644
index 0000000..9d1d415
--- /dev/null
+++ b/t/debug.h
@@ -0,0 +1,6 @@
+#ifndef FIO_DEBUG_INC_H
+#define FIO_DEBUG_INC_H
+
+extern void debug_init(void);
+
+#endif
diff --git a/t/dedupe.c b/t/dedupe.c
index b81e98a..5998138 100644
--- a/t/dedupe.c
+++ b/t/dedupe.c
@@ -27,13 +27,8 @@
 #include "../gettime.h"
 #include "../fio_time.h"
 
-FILE *f_err;
-struct timeval *fio_tv = NULL;
-unsigned int fio_debug = 0;
-
-void __dprint(int type, const char *str, ...)
-{
-}
+#include "../lib/bloom.h"
+#include "debug.h"
 
 struct worker_thread {
        pthread_t thread;
@@ -45,6 +40,7 @@ struct worker_thread {
        uint64_t size;
 
        unsigned long items;
+       unsigned long dupes;
        int err;
 };
 
@@ -66,6 +62,7 @@ struct item {
 };
 
 static struct rb_root rb_root;
+static struct bloom *bloom;
 static struct fio_mutex *rb_lock;
 
 static unsigned int blocksize = 4096;
@@ -75,6 +72,7 @@ static unsigned int dump_output;
 static unsigned int odirect;
 static unsigned int collision_check;
 static unsigned int print_progress = 1;
+static unsigned int use_bloom = 1;
 
 static uint64_t total_size;
 static uint64_t cur_offset;
@@ -116,17 +114,17 @@ static int get_work(uint64_t *offset, uint64_t *size)
        return ret;
 }
 
-static int read_block(int fd, void *buf, off_t offset)
+static int __read_block(int fd, void *buf, off_t offset, size_t count)
 {
        ssize_t ret;
 
-       ret = pread(fd, buf, blocksize, offset);
+       ret = pread(fd, buf, count, offset);
        if (ret < 0) {
                perror("pread");
                return 1;
        } else if (!ret)
                return 1;
-       else if (ret != blocksize) {
+       else if (ret != count) {
                log_err("dedupe: short read on block\n");
                return 1;
        }
@@ -134,6 +132,11 @@ static int read_block(int fd, void *buf, off_t offset)
        return 0;
 }
 
+static int read_block(int fd, void *buf, off_t offset)
+{
+       return __read_block(fd, buf, offset, blocksize);
+}
+
 static void add_item(struct chunk *c, struct item *i)
 {
        /*      
@@ -231,14 +234,24 @@ add:
        add_item(c, i);
 }
 
-static void insert_chunks(struct item *items, unsigned int nitems)
+static void insert_chunks(struct item *items, unsigned int nitems,
+                         uint64_t *ndupes)
 {
        int i;
 
        fio_mutex_down(rb_lock);
 
-       for (i = 0; i < nitems; i++)
-               insert_chunk(&items[i]);
+       for (i = 0; i < nitems; i++) {
+               if (bloom) {
+                       unsigned int s;
+                       int r;
+
+                       s = sizeof(items[i].hash) / sizeof(uint32_t);
+                       r = bloom_set(bloom, items[i].hash, s);
+                       *ndupes += r;
+               } else
+                       insert_chunk(&items[i]);
+       }
 
        fio_mutex_up(rb_lock);
 }
@@ -252,30 +265,46 @@ static void crc_buf(void *buf, uint32_t *hash)
        fio_md5_final(&ctx);
 }
 
+static unsigned int read_blocks(int fd, void *buf, off_t offset, size_t size)
+{
+       if (__read_block(fd, buf, offset, size))
+               return 0;
+
+       return size / blocksize;
+}
+
 static int do_work(struct worker_thread *thread, void *buf)
 {
        unsigned int nblocks, i;
        off_t offset;
-       int err = 0, nitems = 0;
+       int nitems = 0;
+       uint64_t ndupes = 0;
        struct item *items;
 
-       nblocks = thread->size / blocksize;
        offset = thread->cur_offset;
+
+       nblocks = read_blocks(thread->fd, buf, offset, min(thread->size, 
(uint64_t)chunk_size));
+       if (!nblocks)
+               return 1;
+
        items = malloc(sizeof(*items) * nblocks);
 
        for (i = 0; i < nblocks; i++) {
-               if (read_block(thread->fd, buf, offset))
-                       break;
-               items[i].offset = offset;
-               crc_buf(buf, items[i].hash);
+               void *thisptr = buf + (i * blocksize);
+
+               if (items)
+                       items[i].offset = offset;
+               crc_buf(thisptr, items[i].hash);
                offset += blocksize;
                nitems++;
        }
 
-       insert_chunks(items, nitems);
-       thread->items += nitems;
+       insert_chunks(items, nitems, &ndupes);
+
        free(items);
-       return err;
+       thread->items += nitems;
+       thread->dupes += ndupes;
+       return 0;
 }
 
 static void *thread_fn(void *data)
@@ -283,7 +312,7 @@ static void *thread_fn(void *data)
        struct worker_thread *thread = data;
        void *buf;
 
-       buf = fio_memalign(blocksize, blocksize);
+       buf = fio_memalign(blocksize, chunk_size);
 
        do {
                if (get_work(&thread->cur_offset, &thread->size)) {
@@ -297,7 +326,7 @@ static void *thread_fn(void *data)
        } while (1);
 
        thread->done = 1;
-       fio_memfree(buf, blocksize);
+       fio_memfree(buf, chunk_size);
        return NULL;
 }
 
@@ -343,7 +372,8 @@ static void show_progress(struct worker_thread *threads, 
unsigned long total)
        };
 }
 
-static int run_dedupe_threads(int fd, uint64_t dev_size)
+static int run_dedupe_threads(int fd, uint64_t dev_size, uint64_t *nextents,
+                               uint64_t *nchunks)
 {
        struct worker_thread *threads;
        unsigned long nitems, total_items;
@@ -371,20 +401,27 @@ static int run_dedupe_threads(int fd, uint64_t dev_size)
        show_progress(threads, total_items);
 
        nitems = 0;
+       *nextents = 0;
+       *nchunks = 1;
        for (i = 0; i < num_threads; i++) {
                void *ret;
                pthread_join(threads[i].thread, &ret);
                nitems += threads[i].items;
+               *nchunks += threads[i].dupes;
        }
 
        printf("Threads(%u): %lu items processed\n", num_threads, nitems);
 
+       *nextents = nitems;
+       *nchunks = nitems - *nchunks;
+
        fio_mutex_remove(size_lock);
        free(threads);
        return err;
 }
 
-static int dedupe_check(const char *filename)
+static int dedupe_check(const char *filename, uint64_t *nextents,
+                       uint64_t *nchunks)
 {
        uint64_t dev_size;
        struct stat sb;
@@ -412,9 +449,16 @@ static int dedupe_check(const char *filename)
                return 1;
        }
 
+       if (use_bloom) {
+               uint64_t bloom_entries;
+
+               bloom_entries = (3 * dev_size ) / (blocksize * 2);
+               bloom = bloom_new(bloom_entries);
+       }
+
        printf("Will check <%s>, size <%llu>, using %u threads\n", filename, 
(unsigned long long) dev_size, num_threads);
 
-       return run_dedupe_threads(dev_fd, dev_size);
+       return run_dedupe_threads(dev_fd, dev_size, nextents, nchunks);
 }
 
 static void show_chunk(struct chunk *c)
@@ -429,14 +473,24 @@ static void show_chunk(struct chunk *c)
        }
 }
 
-static void iter_rb_tree(void)
+static void show_stat(uint64_t nextents, uint64_t nchunks)
 {
-       struct rb_node *n;
-       uint64_t nchunks;
-       uint64_t nextents;
        double perc;
 
-       nchunks = nextents = 0;
+       printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, 
(unsigned long) nchunks);
+       printf("De-dupe factor: %3.2f\n", (double) nextents / (double) nchunks);
+
+       perc = 1.00 - ((double) nchunks / (double) nextents);
+       perc *= 100.0;
+       printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
+
+}
+
+static void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks)
+{
+       struct rb_node *n;
+
+       *nchunks = *nextents = 0;
 
        n = rb_first(&rb_root);
        if (!n)
@@ -446,20 +500,13 @@ static void iter_rb_tree(void)
                struct chunk *c;
 
                c = rb_entry(n, struct chunk, rb_node);
-               nchunks++;
-               nextents += c->count;
+               (*nchunks)++;
+               *nextents += c->count;
 
                if (dump_output)
                        show_chunk(c);
 
        } while ((n = rb_next(n)) != NULL);
-
-       printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, 
(unsigned long) nchunks);
-       printf("De-dupe factor: %3.2f\n", (double) nextents / (double) nchunks);
-
-       perc = 1.00 - ((double) nchunks / (double) nextents);
-       perc *= 100.0;
-       printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
 }
 
 static int usage(char *argv[])
@@ -471,15 +518,19 @@ static int usage(char *argv[])
        log_err("\t-d\tFull extent/chunk debug output\n");
        log_err("\t-o\tUse O_DIRECT\n");
        log_err("\t-c\tFull collision check\n");
+       log_err("\t-B\tUse probabilistic bloom filter\n");
        log_err("\t-p\tPrint progress indicator\n");
        return 1;
 }
 
 int main(int argc, char *argv[])
 {
+       uint64_t nextents = 0, nchunks = 0;
        int c, ret;
 
-       while ((c = getopt(argc, argv, "b:t:d:o:c:p:")) != -1) {
+       debug_init();
+
+       while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:")) != -1) {
                switch (c) {
                case 'b':
                        blocksize = atoi(optarg);
@@ -499,12 +550,18 @@ int main(int argc, char *argv[])
                case 'p':
                        print_progress = atoi(optarg);
                        break;
+               case 'B':
+                       use_bloom = atoi(optarg);
+                       break;
                case '?':
                default:
                        return usage(argv);
                }
        }
 
+       if (collision_check || dump_output)
+               use_bloom = 0;
+
        if (!num_threads)
                num_threads = cpus_online();
 
@@ -516,11 +573,16 @@ int main(int argc, char *argv[])
        rb_root = RB_ROOT;
        rb_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
 
-       ret = dedupe_check(argv[optind]);
+       ret = dedupe_check(argv[optind], &nextents, &nchunks);
+
+       if (!bloom)
+               iter_rb_tree(&nextents, &nchunks);
 
-       iter_rb_tree();
+       show_stat(nextents, nchunks);
 
        fio_mutex_remove(rb_lock);
+       if (bloom)
+               bloom_free(bloom);
        scleanup();
        return ret;
 }
diff --git a/t/lfsr-test.c b/t/lfsr-test.c
index 481f37e..4b54248 100644
--- a/t/lfsr-test.c
+++ b/t/lfsr-test.c
@@ -8,6 +8,8 @@
 #include <sys/stat.h>
 
 #include "../lib/lfsr.h"
+#include "../gettime.h"
+#include "../fio_time.h"
 
 void usage()
 {
@@ -25,7 +27,7 @@ void usage()
 int main(int argc, char *argv[])
 {
        int r;
-       struct timespec start, end;
+       struct timeval start, end;
        struct fio_lfsr *fl;
        int verify = 0;
        unsigned int spin = 0;
@@ -86,12 +88,12 @@ int main(int argc, char *argv[])
         * negligible overhead.
         */
        fprintf(stderr, "\nTest initiated... ");
-       clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
+       fio_gettime(&start, NULL);
        while (!lfsr_next(fl, &i, fl->max_val)) {
                if (verify)
                        *(uint8_t *)(v + i) += 1;
        }
-       clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
+       fio_gettime(&end, NULL);
        fprintf(stderr, "finished.\n");
 
 
@@ -113,8 +115,7 @@ int main(int argc, char *argv[])
        }
 
        /* Calculate elapsed time and mean time per number */
-       total = (end.tv_sec - start.tv_sec) * pow(10,9) +
-               end.tv_nsec - start.tv_nsec;
+       total = utime_since(&start, &end);
        mean = total / fl->num_vals;
 
        printf("\nTime results ");
@@ -122,7 +123,7 @@ int main(int argc, char *argv[])
                printf("(slower due to verification)");
        printf("\n==============================\n");
        printf("Elapsed: %lf s\n", total / pow(10,9));
-       printf("Mean:    %lf ns\n", mean);
+       printf("Mean:    %lf us\n", mean);
 
        free(v_start);
        free(fl);
diff --git a/t/stest.c b/t/stest.c
index 0da8f2c..efb256e 100644
--- a/t/stest.c
+++ b/t/stest.c
@@ -4,10 +4,7 @@
 
 #include "../smalloc.h"
 #include "../flist.h"
-
-FILE *f_err;
-struct timeval *fio_tv = NULL;
-unsigned int fio_debug = 0;
+#include "debug.h"
 
 #define MAGIC1 0xa9b1c8d2
 #define MAGIC2 0xf0a1e9b3
@@ -72,9 +69,8 @@ static int do_specific_alloc(unsigned long size)
 
 int main(int argc, char *argv[])
 {
-       f_err = stderr;
-
        sinit();
+       debug_init();
 
        do_rand_allocs();
 
@@ -84,7 +80,3 @@ int main(int argc, char *argv[])
        scleanup();
        return 0;
 }
-
-void __dprint(int type, const char *str, ...)
-{
-}
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to