On Tue, 25 Feb 2020 11:41:45 -0500 (EST)
Mikulas Patocka <[email protected]> wrote:

> On Thu, 20 Feb 2020, Lukas Straub wrote:
>
> > If a full tag area is being written, don't read it first. This prevents a
> > read-modify-write cycle and increases performance on HDDs considerably.
> >
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole tag area.
> >
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors 
> > $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) 
> > conv=fsync oflag=direct status=progress
> >
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> >
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> >
> > Signed-off-by: Lukas Straub <[email protected]>
> > ---
> >  drivers/md/dm-integrity.c | 80 ++++++++++++++++++++++-----------------
> >  1 file changed, 46 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..0e5ddcf44935 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,16 @@ static int dm_integrity_rw_tag(struct dm_integrity_c 
> > *ic, unsigned char *tag, se
> >             if (unlikely(r))
> >                     return r;
> >
> > -           data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > -           if (IS_ERR(data))
> > -                   return PTR_ERR(data);
> > +           /* Don't read tag area from disk if we're going to overwrite it 
> > completely */
> > +           if (op == TAG_WRITE && *metadata_offset == 0 && total_size >= 
> > ic->metadata_run) {
>
> Hi
>
> This is incorrect logic because ic->metadata_run is in the units of
> 512-byte sectors and total_size is in bytes.
>
> If I correct the bug and change it to "if (op == TAG_WRITE &&
> *metadata_offset == 0 && total_size >= ic->metadata_run << SECTOR_SHIFT)",
> then the benchmark doesn't show any performance advantage at all.

Uh oh, looking at it again i have mixed up sectors/bytes elsewhere too.
Actually, could we rewrite this check like
 total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)?
this should work, right?
So we only have to overwrite part of the tag area, as long as it's whole 
sectors.

> You would need much bigger bios to take advantage for this - for example,
> if we have 4k block size and 64k metadata buffer size and 4-byte crc32,
> there are 65536/4=16384 tags in one metadata buffer and we would need
> 16384*4096=64MiB bio to completely overwrite the metadata buffer. Such big
> bios are not realistic.

What prevents us from using a single sector as the tag area? (Which was my 
assumption with this patch)
Then we would have (with 512b sectors) 512/4 = 128 tags = 64k bio, which is 
still below the optimal write
size of raid5/6.
I just tried to accomplish this, but there seems to be minimum limit of 
interleave_sectors.

Regards,
Lukas Straub

> Mikulas
>
>
> > +                   data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > +                   if (IS_ERR(data))
> > +                           return PTR_ERR(data);
> > +           } else {
> > +                   data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > +                   if (IS_ERR(data))
> > +                           return PTR_ERR(data);
> > +           }
> >
> >             to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 
> > *metadata_offset, total_size);
> >             dp = data + *metadata_offset;
> > @@ -1514,6 +1521,8 @@ static void integrity_metadata(struct work_struct *w)
> >  {
> >     struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, 
> > work);
> >     struct dm_integrity_c *ic = dio->ic;
> > +   unsigned sectors_to_process = dio->range.n_sectors;
> > +   sector_t sector = dio->range.logical_sector;
> >
> >     int r;
> >
> > @@ -1522,16 +1531,14 @@ static void integrity_metadata(struct work_struct 
> > *w)
> >             struct bio_vec bv;
> >             unsigned digest_size = 
> > crypto_shash_digestsize(ic->internal_hash);
> >             struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct 
> > dm_integrity_io));
> > -           char *checksums;
> > +           char *checksums, *checksums_ptr;
> >             unsigned extra_space = unlikely(digest_size > ic->tag_size) ? 
> > digest_size - ic->tag_size : 0;
> >             char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > -           unsigned sectors_to_process = dio->range.n_sectors;
> > -           sector_t sector = dio->range.logical_sector;
> >
> >             if (unlikely(ic->mode == 'R'))
> >                     goto skip_io;
> >
> > -           checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> 
> > ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > +           checksums = kmalloc((dio->range.n_sectors >> 
> > ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> >                                 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> >             if (!checksums) {
> >                     checksums = checksums_onstack;
> > @@ -1542,49 +1549,45 @@ static void integrity_metadata(struct work_struct 
> > *w)
> >                     }
> >             }
> >
> > +           checksums_ptr = checksums;
> >             __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> >                     unsigned pos;
> > -                   char *mem, *checksums_ptr;
> > -
> > -again:
> > +                   char *mem;
> >                     mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> >                     pos = 0;
> > -                   checksums_ptr = checksums;
> >                     do {
> >                             integrity_sector_checksum(ic, sector, mem + 
> > pos, checksums_ptr);
> > -                           checksums_ptr += ic->tag_size;
> > -                           sectors_to_process -= ic->sectors_per_block;
> > +
> > +                           if (likely(checksums != checksums_onstack)) {
> > +                                   checksums_ptr += ic->tag_size;
> > +                           } else {
> > +                                   r = dm_integrity_rw_tag(ic, checksums, 
> > &dio->metadata_block, &dio->metadata_offset,
> > +                                                           ic->tag_size, 
> > !dio->write ? TAG_CMP : TAG_WRITE);
> > +                                   if (unlikely(r))
> > +                                           goto internal_hash_error;
> > +                           }
> > +
> >                             pos += ic->sectors_per_block << SECTOR_SHIFT;
> >                             sector += ic->sectors_per_block;
> > -                   } while (pos < bv.bv_len && sectors_to_process && 
> > checksums != checksums_onstack);
> > +                           sectors_to_process -= ic->sectors_per_block;
> > +                   } while (pos < bv.bv_len && sectors_to_process);
> >                     kunmap_atomic(mem);
> >
> > -                   r = dm_integrity_rw_tag(ic, checksums, 
> > &dio->metadata_block, &dio->metadata_offset,
> > -                                           checksums_ptr - checksums, 
> > !dio->write ? TAG_CMP : TAG_WRITE);
> > -                   if (unlikely(r)) {
> > -                           if (r > 0) {
> > -                                   DMERR_LIMIT("Checksum failed at sector 
> > 0x%llx",
> > -                                               (unsigned long long)(sector 
> > - ((r + ic->tag_size - 1) / ic->tag_size)));
> > -                                   r = -EILSEQ;
> > -                                   atomic64_inc(&ic->number_of_mismatches);
> > -                           }
> > -                           if (likely(checksums != checksums_onstack))
> > -                                   kfree(checksums);
> > -                           goto error;
> > -                   }
> > -
> >                     if (!sectors_to_process)
> >                             break;
> > +           }
> >
> > -                   if (unlikely(pos < bv.bv_len)) {
> > -                           bv.bv_offset += pos;
> > -                           bv.bv_len -= pos;
> > -                           goto again;
> > +           if (likely(checksums != checksums_onstack)) {
> > +                   r = dm_integrity_rw_tag(ic, checksums, 
> > &dio->metadata_block, &dio->metadata_offset,
> > +                                           (dio->range.n_sectors >> 
> > ic->sb->log2_sectors_per_block) * ic->tag_size,
> > +                                           !dio->write ? TAG_CMP : 
> > TAG_WRITE);
> > +                   if (unlikely(r)) {
> > +                           kfree(checksums);
> > +                           goto internal_hash_error;
> >                     }
> > +                   kfree(checksums);
> >             }
> >
> > -           if (likely(checksums != checksums_onstack))
> > -                   kfree(checksums);
> >     } else {
> >             struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> >
> > @@ -1615,6 +1618,13 @@ static void integrity_metadata(struct work_struct *w)
> >  skip_io:
> >     dec_in_flight(dio);
> >     return;
> > +internal_hash_error:
> > +   if (r > 0) {
> > +           DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > +                           (unsigned long long)(sector - ((r + 
> > ic->tag_size - 1) / ic->tag_size)));
> > +           r = -EILSEQ;
> > +           atomic64_inc(&ic->number_of_mismatches);
> > +   }
> >  error:
> >     dio->bi_status = errno_to_blk_status(r);
> >     dec_in_flight(dio);
> > @@ -3019,6 +3029,8 @@ static void dm_integrity_io_hints(struct dm_target 
> > *ti, struct queue_limits *lim
> >             limits->physical_block_size = ic->sectors_per_block << 
> > SECTOR_SHIFT;
> >             blk_limits_io_min(limits, ic->sectors_per_block << 
> > SECTOR_SHIFT);
> >     }
> > +
> > +   blk_limits_io_opt(limits, (1U << ic->sb->log2_interleave_sectors));
> >  }
> >
> >  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> >
> >
> > --
> > dm-devel mailing list
> > [email protected]
> > https://www.redhat.com/mailman/listinfo/dm-devel
> >
>


--
dm-devel mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to