Hello all,
Here is a patch which allows to skip intermediate data copying between the bio
requested to write and the disk cache in sh if the full-stripe write
operation is
on the way.
This improves the performance of write operations for some dedicated cases
when big chunks of data are being sequentially written to RAID array, but in
general eliminating disk cache slows the performance down.
The performance results obtained on the ppc440spe-based board using the
PPC440SPE ADMA driver, Xdd benchmark, and the RAID-5 of 4 disks are as
follows:
SKIP_BIO_SET = 'N': 40 MBps;
SKIP_BIO_SET = 'Y': 70 MBps.
The actual test commands used:
# mdadm -C /dev/md0 -c 16 -l 5 -n 4 /dev/sd[a-d]
# xdd -op write -kbytes 48000 -reqsize 48 -dio -syncwrite -passes 3 -verbose
-target /dev/md0
Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED]
Signed-off-by: Mikhail Cherkashin [EMAIL PROTECTED]
--
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 9b6fbf0..7a709aa 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -137,6 +137,18 @@ config MD_RAID456
If unsure, say Y.
+config MD_RAID_SKIP_BIO_COPY
+ bool Skip intermediate bio-cache copy
+ depends on MD_RAID456
+ default n
+ ---help---
+ Skip intermediate data copying between the bio requested to write and
+ the disk cache in sh if the full-stripe write operation is on the
way.
+ This might improve the performance of write operations in some
dedicated
+ cases but generally eliminating disk cache slows the performance down.
+
+ If unsure, say N.
+
config MD_RAID5_RESHAPE
bool Support adding drives to a raid-5 array
depends on MD_RAID456
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9a4959a..904b27e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -996,6 +996,9 @@ ops_run_biodrain(struct stripe_head *sh, struct
dma_async_tx_descriptor *tx,
struct stripe_queue *sq = sh-sq;
int disks = sq-disks;
int pd_idx = sq-pd_idx;
+#ifdef CONFIG_MD_RAID_SKIP_BIO_COPY
+ int fswrite = 1;
+#endif
int i;
/* check if prexor is active which means only process blocks
@@ -1006,6 +1009,70 @@ ops_run_biodrain(struct stripe_head *sh, struct
dma_async_tx_descriptor *tx,
pr_debug(%s: stripe %llu\n, __FUNCTION__,
(unsigned long long)sh-sector);
+#ifdef CONFIG_MD_RAID_SKIP_BIO_COPY
+ /* initially assume that the operation is a full-stripe write*/
+ for (i = disks; i-- ;) {
+ struct r5dev *dev;
+
+ if (unlikely(i == pd_idx))
+ continue;
+ if (unlikely(!sq-dev[i].towrite || prexor))
+ goto do_copy;
+ dev = sh-dev[i];
+ if ((test_bit(R5_OVERWRITE, dev-flags))
+ !r5_next_bio(sq-dev[i].towrite, sq-dev[i].sector)) {
+ /* now check if there is only one bio_vec within
+* the bio covers the sh-dev[i]
+*/
+ struct bio *pbio = sq-dev[i].towrite;
+ struct bio_vec *bvl;
+ int found = 0;
+ int bvec_page = pbio-bi_sector 9, k;
+ int dev_page = sq-dev[i].sector 9;
+
+ /* search for the bio_vec that covers dev[i].page */
+ bio_for_each_segment(bvl, pbio, k) {
+ if (bvec_page == dev_page
+ bio_iovec_idx(pbio,k)-bv_len ==
+ STRIPE_SIZE) {
+ /* found the vector which covers the
+* strip fully
+*/
+ found = 1;
+ break;
+ }
+ bvec_page += bio_iovec_idx(pbio,k)-bv_len;
+ }
+
+ if (found) {
+ /* save the direct pointer to buffer */
+ BUG_ON(dev-dpage);
+ dev-dpage = bio_iovec_idx(pbio,k)-bv_page;
+ continue;
+ }
+ }
+
+do_copy:
+ /* come here in two cases:
+* - the dev[i] is not covered fully with the bio;
+* - there are more than one bios cover the dev[i].
+* in both cases do copy from bio to dev[i].page
+*/
+ pr_debug(%s: do copy because of disk %d\n, __FUNCTION__, i);
+ do {
+ /* restore dpages set */
+ sh-dev[i].dpage = NULL;
+ } while (i++ != disks);
+ fswrite = 0;
+ break;
+ }
+
+ if (fswrite) {
+