Hello Sergey,

Sorry for the late response. I am in long vavacation now but today,
I get small time to sit down on computer. :)

On Mon, Dec 14, 2015 at 09:38:55PM +0900, Sergey Senozhatsky wrote:
> Hello,
> 
> I've been thinking about this for some time, but didn't have a chance
> to properly investigate so far. My question is: why do we even bother
> with partial IO in zram?

It was done before I involved zram actively so I should spend a time
to search the reason.
Firstly, author was Jerome.
http://lists.openwall.net/linux-kernel/2011/06/10/318

And Nitin wanted to increase logical block size 64K instead of
making complex part by partial I/O.
http://lists.openwall.net/linux-kernel/2011/06/10/402

And Jeff and Martin said there is no problem to increase
logical_block_size from unsigned short if people are aware of
the implications bigger blocks have on the filesystems they put on top.

http://lists.openwall.net/linux-kernel/2011/06/14/289
http://lists.openwall.net/linux-kernel/2011/06/14/324

Jerome finally found severe problem which FAT fs are unable to
cope with 64K logical blocks at least.
http://lists.openwall.net/linux-kernel/2011/07/01/196

That's why Nitin decided to suppport partial IO in zram.
And I think it does make sense.

Thanks.

> 
> We set zram->disk->queue->limits.discard_granularity to PAGE_SIZE,
> just like the rest of queue configuration, e.g.
>       blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE)
> etc.
> 
> And it's only blk_queue_logical_block_size() that has !PAGE_SIZE
> configuration (on systems where PAGE_SIZE != ZRAM_LOGICAL_BLOCK_SHIFT).
> 
> 
> The only requirement enforced on blk_queue_logical_block_size() from
> zram/zsmalloc side seems to be that it should be less than or equal
> to ZS_MAX_ALLOC_SIZE, which is:
>       #define ZS_MAX_ALLOC_SIZE       PAGE_SIZE
> 
> 
> We talk to compressing backends with PAGE_SIZE-d buffers,
> zsmalloc understands PAGE_SIZE allocations and we can simplify
> things if we will be able to drop this partial IO support thing.
> So why do we keep it?
> 
> What am I missing? Sorry if there is something obvious.
> 
> 
> ** NOT TESTED ON SYSTEMS WITH PAGE_SIZE OTHER THAN 4K **
> Will try to do this later this week (well, if it makes sense).
> 
> 
> ===8<===8<===
> 
> From e55739c453c1ab01ab9b6ef734b078c1d96fbde2 Mon Sep 17 00:00:00 2001
> From: Sergey Senozhatsky <[email protected]>
> Date: Mon, 14 Dec 2015 21:23:39 +0900
> Subject: [PATCH] zram: drop partial_io handling
> 
> Not-yet-Signed-off-by: Sergey Senozhatsky <[email protected]>
> ---
>  drivers/block/zram/zram_drv.c | 99 
> ++++++-------------------------------------
>  drivers/block/zram/zram_drv.h |  3 +-
>  2 files changed, 14 insertions(+), 88 deletions(-)
> 
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 47915d7..9bb5e3b 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -106,11 +106,6 @@ static void zram_set_obj_size(struct zram_meta *meta,
>       meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
>  }
>  
> -static inline bool is_partial_io(struct bio_vec *bvec)
> -{
> -     return bvec->bv_len != PAGE_SIZE;
> -}
> -
>  /*
>   * Check if request is within bounds and aligned on zram logical blocks.
>   */
> @@ -178,10 +173,7 @@ static void handle_zero_page(struct bio_vec *bvec)
>       void *user_mem;
>  
>       user_mem = kmap_atomic(page);
> -     if (is_partial_io(bvec))
> -             memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
> -     else
> -             clear_page(user_mem);
> +     clear_page(user_mem);
>       kunmap_atomic(user_mem);
>  
>       flush_dcache_page(page);
> @@ -600,7 +592,7 @@ static int zram_bvec_read(struct zram *zram, struct 
> bio_vec *bvec,
>  {
>       int ret;
>       struct page *page;
> -     unsigned char *user_mem, *uncmem = NULL;
> +     unsigned char *uncmem;
>       struct zram_meta *meta = zram->meta;
>       page = bvec->bv_page;
>  
> @@ -613,35 +605,16 @@ static int zram_bvec_read(struct zram *zram, struct 
> bio_vec *bvec,
>       }
>       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
>  
> -     if (is_partial_io(bvec))
> -             /* Use  a temporary buffer to decompress the page */
> -             uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
> -
> -     user_mem = kmap_atomic(page);
> -     if (!is_partial_io(bvec))
> -             uncmem = user_mem;
> -
> -     if (!uncmem) {
> -             pr_err("Unable to allocate temp memory\n");
> -             ret = -ENOMEM;
> -             goto out_cleanup;
> -     }
> -
> +     uncmem = kmap_atomic(page);
>       ret = zram_decompress_page(zram, uncmem, index);
>       /* Should NEVER happen. Return bio error if it does. */
>       if (unlikely(ret))
>               goto out_cleanup;
>  
> -     if (is_partial_io(bvec))
> -             memcpy(user_mem + bvec->bv_offset, uncmem + offset,
> -                             bvec->bv_len);
> -
>       flush_dcache_page(page);
>       ret = 0;
>  out_cleanup:
> -     kunmap_atomic(user_mem);
> -     if (is_partial_io(bvec))
> -             kfree(uncmem);
> +     kunmap_atomic(uncmem);
>       return ret;
>  }
>  
> @@ -652,42 +625,17 @@ static int zram_bvec_write(struct zram *zram, struct 
> bio_vec *bvec, u32 index,
>       size_t clen;
>       unsigned long handle;
>       struct page *page;
> -     unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
> +     unsigned char *user_mem, *cmem;
>       struct zram_meta *meta = zram->meta;
>       struct zcomp_strm *zstrm = NULL;
>       unsigned long alloced_pages;
>  
>       page = bvec->bv_page;
> -     if (is_partial_io(bvec)) {
> -             /*
> -              * This is a partial IO. We need to read the full page
> -              * before to write the changes.
> -              */
> -             uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
> -             if (!uncmem) {
> -                     ret = -ENOMEM;
> -                     goto out;
> -             }
> -             ret = zram_decompress_page(zram, uncmem, index);
> -             if (ret)
> -                     goto out;
> -     }
> -
>       zstrm = zcomp_strm_find(zram->comp);
>       user_mem = kmap_atomic(page);
>  
> -     if (is_partial_io(bvec)) {
> -             memcpy(uncmem + offset, user_mem + bvec->bv_offset,
> -                    bvec->bv_len);
> +     if (page_zero_filled(user_mem)) {
>               kunmap_atomic(user_mem);
> -             user_mem = NULL;
> -     } else {
> -             uncmem = user_mem;
> -     }
> -
> -     if (page_zero_filled(uncmem)) {
> -             if (user_mem)
> -                     kunmap_atomic(user_mem);
>               /* Free memory associated with this sector now. */
>               bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
>               zram_free_page(zram, index);
> @@ -699,22 +647,15 @@ static int zram_bvec_write(struct zram *zram, struct 
> bio_vec *bvec, u32 index,
>               goto out;
>       }
>  
> -     ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
> -     if (!is_partial_io(bvec)) {
> -             kunmap_atomic(user_mem);
> -             user_mem = NULL;
> -             uncmem = NULL;
> -     }
> +     ret = zcomp_compress(zram->comp, zstrm, user_mem, &clen);
> +     kunmap_atomic(user_mem);
>  
>       if (unlikely(ret)) {
>               pr_err("Compression failed! err=%d\n", ret);
>               goto out;
>       }
> -     src = zstrm->buffer;
>       if (unlikely(clen > max_zpage_size)) {
>               clen = PAGE_SIZE;
> -             if (is_partial_io(bvec))
> -                     src = uncmem;
>       }
>  
>       handle = zs_malloc(meta->mem_pool, clen);
> @@ -736,12 +677,12 @@ static int zram_bvec_write(struct zram *zram, struct 
> bio_vec *bvec, u32 index,
>  
>       cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
>  
> -     if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
> -             src = kmap_atomic(page);
> +     if (clen == PAGE_SIZE) {
> +             unsigned char *src = kmap_atomic(page);
>               copy_page(cmem, src);
>               kunmap_atomic(src);
>       } else {
> -             memcpy(cmem, src, clen);
> +             memcpy(cmem, zstrm->buffer, clen);
>       }
>  
>       zcomp_strm_release(zram->comp, zstrm);
> @@ -765,8 +706,6 @@ static int zram_bvec_write(struct zram *zram, struct 
> bio_vec *bvec, u32 index,
>  out:
>       if (zstrm)
>               zcomp_strm_release(zram->comp, zstrm);
> -     if (is_partial_io(bvec))
> -             kfree(uncmem);
>       return ret;
>  }
>  
> @@ -1242,24 +1181,12 @@ static int zram_add(void)
>        * and n*PAGE_SIZED sized I/O requests.
>        */
>       blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
> -     blk_queue_logical_block_size(zram->disk->queue,
> -                                     ZRAM_LOGICAL_BLOCK_SIZE);
> +     blk_queue_logical_block_size(zram->disk->queue, PAGE_SIZE);
>       blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
>       blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
>       zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
>       blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
> -     /*
> -      * zram_bio_discard() will clear all logical blocks if logical block
> -      * size is identical with physical block size(PAGE_SIZE). But if it is
> -      * different, we will skip discarding some parts of logical blocks in
> -      * the part of the request range which isn't aligned to physical block
> -      * size.  So we can't ensure that all discarded logical blocks are
> -      * zeroed.
> -      */
> -     if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
> -             zram->disk->queue->limits.discard_zeroes_data = 1;
> -     else
> -             zram->disk->queue->limits.discard_zeroes_data = 0;
> +     zram->disk->queue->limits.discard_zeroes_data = 1;
>       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
>  
>       add_disk(zram->disk);
> diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
> index 9b296c4..0c89c6d 100644
> --- a/drivers/block/zram/zram_drv.h
> +++ b/drivers/block/zram/zram_drv.h
> @@ -39,12 +39,11 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
>  #define SECTOR_SHIFT         9
>  #define SECTORS_PER_PAGE_SHIFT       (PAGE_SHIFT - SECTOR_SHIFT)
>  #define SECTORS_PER_PAGE     (1 << SECTORS_PER_PAGE_SHIFT)
> -#define ZRAM_LOGICAL_BLOCK_SHIFT 12
> +#define ZRAM_LOGICAL_BLOCK_SHIFT     PAGE_SHIFT
>  #define ZRAM_LOGICAL_BLOCK_SIZE      (1 << ZRAM_LOGICAL_BLOCK_SHIFT)
>  #define ZRAM_SECTOR_PER_LOGICAL_BLOCK        \
>       (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
>  
> -
>  /*
>   * The lower ZRAM_FLAG_SHIFT bits of table.value is for
>   * object size (excluding header), the higher bits is for
> -- 
> 2.6.4
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to