When dm-writecache is used with SSD as a cache device, it would submit
separate bio for each written block. The I/Os would be merged by the disk
scheduler, but this merging degrades performance.

This patch makes dm-writecache submit larger bios - we can submit large
bio as long as there is consecutive free space on the cache device.

Benchmark (arm64 with 64k page size, using /dev/ram0 as a cache device):

fio --bs=512k --iodepth=32 --size=400M --direct=1 --filename=/dev/mapper/cache 
--rw=randwrite --numjobs=1 --name=test

block   old     new
size    MiB/s   MiB/s
---------------------
512     181     700
1k      347     1256
2k      644     2020
4k      1183    2759
8k      1852    3333
16k     2469    3509
32k     2974    3670
64k     3404    3810

Signed-off-by: Mikulas Patocka <[email protected]>

---
 drivers/md/dm-writecache.c |   28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-writecache.c   2020-01-14 16:11:09.000000000 
+0100
+++ linux-2.6/drivers/md/dm-writecache.c        2020-01-14 21:42:44.000000000 
+0100
@@ -626,7 +626,7 @@ static void writecache_add_to_freelist(s
        wc->freelist_size++;
 }
 
-static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc)
+static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, 
sector_t expected_sector)
 {
        struct wc_entry *e;
 
@@ -635,6 +635,8 @@ static struct wc_entry *writecache_pop_f
                if (unlikely(!wc->current_free))
                        return NULL;
                e = wc->current_free;
+               if (expected_sector != (sector_t)-1 && 
unlikely(cache_sector(wc, e) != expected_sector))
+                       return NULL;
                next = rb_next(&e->rb_node);
                rb_erase(&e->rb_node, &wc->freetree);
                if (unlikely(!next))
@@ -644,6 +646,8 @@ static struct wc_entry *writecache_pop_f
                if (unlikely(list_empty(&wc->freelist)))
                        return NULL;
                e = container_of(wc->freelist.next, struct wc_entry, lru);
+               if (expected_sector != (sector_t)-1 && 
unlikely(cache_sector(wc, e) != expected_sector))
+                       return NULL;
                list_del(&e->lru);
        }
        wc->freelist_size--;
@@ -1194,7 +1198,7 @@ read_next_block:
                                        goto bio_copy;
                                }
                        }
-                       e = writecache_pop_from_freelist(wc);
+                       e = writecache_pop_from_freelist(wc, (sector_t)-1);
                        if (unlikely(!e)) {
                                writecache_wait_on_freelist(wc);
                                continue;
@@ -1206,9 +1210,25 @@ bio_copy:
                        if (WC_MODE_PMEM(wc)) {
                                bio_copy_block(wc, bio, memory_data(wc, e));
                        } else {
-                               dm_accept_partial_bio(bio, wc->block_size >> 
SECTOR_SHIFT);
+                               unsigned bio_size = wc->block_size;
+                               sector_t start_cache_sec = cache_sector(wc, e);
+                               sector_t current_cache_sec = start_cache_sec + 
(bio_size >> SECTOR_SHIFT);
+
+                               while (bio_size < bio->bi_iter.bi_size) {
+                                       struct wc_entry *f = 
writecache_pop_from_freelist(wc, current_cache_sec);
+                                       if (!f)
+                                               break;
+                                       write_original_sector_seq_count(wc, f, 
bio->bi_iter.bi_sector + (bio_size >> SECTOR_SHIFT), wc->seq_count);
+                                       writecache_insert_entry(wc, f);
+                                       wc->uncommitted_blocks++;
+                                       bio_size += wc->block_size;
+                                       current_cache_sec += wc->block_size >> 
SECTOR_SHIFT;
+                               }
+
                                bio_set_dev(bio, wc->ssd_dev->bdev);
-                               bio->bi_iter.bi_sector = cache_sector(wc, e);
+                               bio->bi_iter.bi_sector = start_cache_sec;
+                               dm_accept_partial_bio(bio, bio_size >> 
SECTOR_SHIFT);
+
                                if (unlikely(wc->uncommitted_blocks >= 
wc->autocommit_blocks)) {
                                        wc->uncommitted_blocks = 0;
                                        queue_work(wc->writeback_wq, 
&wc->flush_work);

--
dm-devel mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to